ant private-javadocs runs without warnings; cleaned up some
as-yet-unused code.
This commit is contained in:
parent
644c0d3801
commit
6636d03a41
9 changed files with 158 additions and 145 deletions
|
@ -19,7 +19,7 @@ Contributor(s): ______________________________________.
|
|||
package org.thdl.tib.text;
|
||||
|
||||
/** This is where basic, static knowledge of THDL's Extended Wylie is housed.
|
||||
* @see org.thdl.tib.text#TibetanMachineWeb */
|
||||
* @see TibetanMachineWeb */
|
||||
public interface THDLWylieConstants {
|
||||
/**
|
||||
* the Wylie for bindu/anusvara
|
||||
|
|
|
@ -146,7 +146,7 @@ public final class LegalTshegBar
|
|||
* must not be absent. To learn about the arguments, and to be
|
||||
* sure that your input won't cause an exception to be thrown,
|
||||
* see {@link
|
||||
* #formsLegalTshegBar(char,char,char,char,boolean,boolean,String,char,char)}.
|
||||
* #formsLegalTshegBar(char,char,char,char,boolean,boolean,String,char,char,StringBuffer)}.
|
||||
*
|
||||
* @exception IllegalArgumentException if the rootLetter is not
|
||||
* one of the thirty consonants (and represented nominally, at
|
||||
|
@ -712,7 +712,7 @@ public final class LegalTshegBar
|
|||
* @exception IllegalArgumentException if the syllable does not
|
||||
* follow the rules of a Tibetan syllable. To learn about the
|
||||
* arguments, see {@link
|
||||
* #formsLegalTshegBar(char,char,char,char,boolean,boolean,String,char,char)}. */
|
||||
* #formsLegalTshegBar(char,char,char,char,boolean,boolean,String,char,char,StringBuffer)}. */
|
||||
private static void throwIfNotLegalTshegBar(char prefix,
|
||||
char headLetter,
|
||||
char rootLetter,
|
||||
|
@ -745,7 +745,7 @@ public final class LegalTshegBar
|
|||
/** If you get through this gauntlet without having an exception
|
||||
* thrown, then this combination makes a legal Tibetan syllable.
|
||||
* To learn about the arguments, see {@link
|
||||
* #formsLegalTshegBar(char,char,char,char,boolean,boolean,String,char,char)}.
|
||||
* #formsLegalTshegBar(char,char,char,char,boolean,boolean,String,char,char,StringBuffer)}.
|
||||
* @param errorBuf if non-null, the reason this is illegal will
|
||||
* be written here, if this is illegal
|
||||
* @return true if this syllable is legal, false if this syllable
|
||||
|
@ -1257,13 +1257,13 @@ public final class LegalTshegBar
|
|||
* combination of superscribed, root, and subscribed letters)
|
||||
* takes an EWC_ga prefix.
|
||||
* @param head the {@link
|
||||
* isNominalRepresentationOfConsonant(char) nominal
|
||||
* #isNominalRepresentationOfConsonant(char) nominal
|
||||
* representation} of the superscribed letter, or EW_ABSENT if
|
||||
* not present
|
||||
* @param root the {@link
|
||||
* isNominalRepresentationOfConsonant(char) nominal
|
||||
* #isNominalRepresentationOfConsonant(char) nominal
|
||||
* representation} of the root letter
|
||||
* @param sub the {@link isNominalRepresentationOfConsonant(char)
|
||||
* @param sub the {@link #isNominalRepresentationOfConsonant(char)
|
||||
* nominal representation} of the subjoined letter, or EW_ABSENT
|
||||
* if not present */
|
||||
static boolean takesGao(char head, char root, char sub) {
|
||||
|
@ -1289,13 +1289,13 @@ public final class LegalTshegBar
|
|||
* combination of superscribed, root, and subscribed letters)
|
||||
* takes an EWC_da prefix.
|
||||
* @param head the {@link
|
||||
* isNominalRepresentationOfConsonant(char) nominal
|
||||
* #isNominalRepresentationOfConsonant(char) nominal
|
||||
* representation} of the superscribed letter, or EW_ABSENT if
|
||||
* not present
|
||||
* @param root the {@link
|
||||
* isNominalRepresentationOfConsonant(char) nominal
|
||||
* #isNominalRepresentationOfConsonant(char) nominal
|
||||
* representation} of the root letter
|
||||
* @param sub the {@link isNominalRepresentationOfConsonant(char)
|
||||
* @param sub the {@link #isNominalRepresentationOfConsonant(char)
|
||||
* nominal representation} of the subjoined letter, or EW_ABSENT
|
||||
* if not present */
|
||||
static boolean takesDao(char head, char root, char sub) {
|
||||
|
@ -1327,13 +1327,13 @@ public final class LegalTshegBar
|
|||
* combination of superscribed, root, and subscribed letters)
|
||||
* takes an EWC_achung prefix.
|
||||
* @param head the {@link
|
||||
* isNominalRepresentationOfConsonant(char) nominal
|
||||
* #isNominalRepresentationOfConsonant(char) nominal
|
||||
* representation} of the superscribed letter, or EW_ABSENT if
|
||||
* not present
|
||||
* @param root the {@link
|
||||
* isNominalRepresentationOfConsonant(char) nominal
|
||||
* #isNominalRepresentationOfConsonant(char) nominal
|
||||
* representation} of the root letter
|
||||
* @param sub the {@link isNominalRepresentationOfConsonant(char)
|
||||
* @param sub the {@link #isNominalRepresentationOfConsonant(char)
|
||||
* nominal representation} of the subjoined letter, or EW_ABSENT
|
||||
* if not present */
|
||||
static boolean takesAchungPrefix(char head, char root, char sub) {
|
||||
|
@ -1370,13 +1370,13 @@ public final class LegalTshegBar
|
|||
* combination of superscribed, root, and subscribed letters)
|
||||
* takes an EWC_ma prefix.
|
||||
* @param head the {@link
|
||||
* isNominalRepresentationOfConsonant(char) nominal
|
||||
* #isNominalRepresentationOfConsonant(char) nominal
|
||||
* representation} of the superscribed letter, or EW_ABSENT if
|
||||
* not present
|
||||
* @param root the {@link
|
||||
* isNominalRepresentationOfConsonant(char) nominal
|
||||
* #isNominalRepresentationOfConsonant(char) nominal
|
||||
* representation} of the root letter
|
||||
* @param sub the {@link isNominalRepresentationOfConsonant(char)
|
||||
* @param sub the {@link #isNominalRepresentationOfConsonant(char)
|
||||
* nominal representation} of the subjoined letter, or EW_ABSENT
|
||||
* if not present */
|
||||
static boolean takesMao(char head, char root, char sub) {
|
||||
|
@ -1409,13 +1409,13 @@ public final class LegalTshegBar
|
|||
* combination of superscribed, root, and subscribed letters)
|
||||
* takes an EWC_ba prefix.
|
||||
* @param head the {@link
|
||||
* isNominalRepresentationOfConsonant(char) nominal
|
||||
* #isNominalRepresentationOfConsonant(char) nominal
|
||||
* representation} of the superscribed letter, or EW_ABSENT if
|
||||
* not present
|
||||
* @param root the {@link
|
||||
* isNominalRepresentationOfConsonant(char) nominal
|
||||
* #isNominalRepresentationOfConsonant(char) nominal
|
||||
* representation} of the root letter
|
||||
* @param sub the {@link isNominalRepresentationOfConsonant(char)
|
||||
* @param sub the {@link #isNominalRepresentationOfConsonant(char)
|
||||
* nominal representation} of the subjoined letter, or EW_ABSENT
|
||||
* if not present */
|
||||
static boolean takesBao(char head, char root, char sub) {
|
||||
|
|
|
@ -18,6 +18,8 @@ Contributor(s): ______________________________________.
|
|||
|
||||
package org.thdl.tib.text.tshegbar;
|
||||
|
||||
import java.util.Vector;
|
||||
|
||||
/** DLC FIXMEDOC: says "this isn't legal Tibetan", not "this isn't a valid sequence of Unicode" */
|
||||
class TibetanSyntaxException extends Exception {
|
||||
/** This constructor creates an exception with a less than helpful
|
||||
|
@ -46,6 +48,6 @@ class TibetanSyntaxException extends Exception {
|
|||
GraphemeCluster in the syntactically incorrect stretch of
|
||||
Tibetan. */
|
||||
TibetanSyntaxException(Vector grcls, int start, int end) {
|
||||
DLC NOW;
|
||||
throw new Error("DLC NOW");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,6 +18,8 @@ Contributor(s): ______________________________________.
|
|||
|
||||
package org.thdl.tib.text.tshegbar;
|
||||
|
||||
import org.thdl.util.ThdlDebug;
|
||||
|
||||
|
||||
/** DLC FIXMEDOC */
|
||||
class TransitionInstruction implements UnicodeReadingStateMachineConstants {
|
||||
|
@ -25,19 +27,19 @@ class TransitionInstruction implements UnicodeReadingStateMachineConstants {
|
|||
TransitionInstruction(int nextState, int action) {
|
||||
super();
|
||||
|
||||
assert(action == ACTION_CONTINUES_GRAPHEME_CLUSTER
|
||||
|| action == ACTION_BEGINS_NEW_GRAPHEME_CLUSTER
|
||||
|| action == ACTION_PREPEND_WITH_0F68);
|
||||
ThdlDebug.verify(action == ACTION_CONTINUES_GRAPHEME_CLUSTER
|
||||
|| action == ACTION_BEGINS_NEW_GRAPHEME_CLUSTER
|
||||
|| action == ACTION_PREPEND_WITH_0F68); // DLC FIXME: assert this.
|
||||
|
||||
assert(nextState == STATE_START
|
||||
|| nextState == STATE_READY
|
||||
|| nextState == STATE_DIGIT
|
||||
|| nextState == STATE_STACKING
|
||||
|| nextState == STATE_STACKPLUSACHUNG
|
||||
|| nextState == STATE_PARTIALMARK);
|
||||
ThdlDebug.verify(nextState == STATE_START
|
||||
|| nextState == STATE_READY
|
||||
|| nextState == STATE_DIGIT
|
||||
|| nextState == STATE_STACKING
|
||||
|| nextState == STATE_STACKPLUSACHUNG
|
||||
|| nextState == STATE_PARTIALMARK); // DLC FIXME: assert this.
|
||||
|
||||
// we start in the start state, but we can never return to it.
|
||||
assert(nextState != STATE_START);
|
||||
ThdlDebug.verify(nextState != STATE_START); // DLC FIXME: assert this.
|
||||
|
||||
this.nextState = nextState;
|
||||
this.action = action;
|
||||
|
@ -55,4 +57,60 @@ class TransitionInstruction implements UnicodeReadingStateMachineConstants {
|
|||
|
||||
int getAction() { return action; }
|
||||
int getNextState() { return nextState; }
|
||||
|
||||
|
||||
/** Returns the codepoint class for cp, e.g. {@link
|
||||
* UnicodeReadingStateMachineConstants#CC_SJC}.
|
||||
* @param cp a Unicode codepoint, which MUST be nondecomposable
|
||||
* if it is in the Tibetan range but can be from outside the
|
||||
* Tibetan range of Unicode */
|
||||
static int getCCForCP(char cp) {
|
||||
ThdlDebug.verify(getNFTHDL(cp) == null); // DLC FIXME: assert this
|
||||
if ('\u0F82' == cp) {
|
||||
return CC_0F82;
|
||||
} else if ('\u0F8A' == cp) {
|
||||
return CC_0F8A;
|
||||
} else if ('\u0F39' == cp) {
|
||||
return CC_0F39;
|
||||
} else if ('\u0F71' == cp) {
|
||||
return CC_SUBSCRIBED_ACHUNG;
|
||||
} else if ('\u0F40' <= cp && cp <= '\u0F6A') {
|
||||
ThdlDebug.verify(cp != '\u0F48'); // DLC FIXME: assert this
|
||||
return CC_CON;
|
||||
} else if ('\u0F90' <= cp && cp <= '\u0FBC') {
|
||||
ThdlDebug.verify(cp != '\u0F98'); // DLC FIXME: assert this
|
||||
return CC_SJC;
|
||||
} else if ('\u0F20' <= cp && cp <= '\u0F33') {
|
||||
return CC_DIGIT;
|
||||
} else if (/* DLC NOW do these combine ONLY with digits, or do CC_CM just NOT combine with digits? */
|
||||
'\u0F3E' == cp
|
||||
|| '\u0F3F' == cp
|
||||
|| '\u0F18' == cp
|
||||
|| '\u0F19' == cp) {
|
||||
return CC_MCWD;
|
||||
} else if ('\u0FC6' == cp
|
||||
|| '\u0F87' == cp
|
||||
|| '\u0F86' == cp
|
||||
|| '\u0F84' == cp
|
||||
|| '\u0F83' == cp
|
||||
|| '\u0F82' == cp
|
||||
|| '\u0F7F' == cp
|
||||
|| '\u0F7E' == cp
|
||||
|| '\u0F37' == cp /* DLC NOW NORMALIZATION OF 0F10, 11 to 0F0F ??? */
|
||||
|| '\u0F35' == cp) {
|
||||
return CC_CM;
|
||||
} else if ('\u0F72' == cp
|
||||
|| '\u0F74' == cp
|
||||
|| '\u0F7A' == cp
|
||||
|| '\u0F7B' == cp
|
||||
|| '\u0F7C' == cp
|
||||
|| '\u0F7D' == cp
|
||||
|| '\u0F80' == cp) {
|
||||
// DLC what about U+0F84 ??? CC_V or CC_CM ?
|
||||
return CC_V;
|
||||
} else {
|
||||
return CC_SIN;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -387,7 +387,7 @@ public class UnicodeGraphemeCluster
|
|||
throw new Error("DLC FIXME");
|
||||
}
|
||||
|
||||
// DLC NOW -- LegalSyllable doesn't handle digits w/ underlining, etc.
|
||||
// DLC NOW -- LegalTshegBar doesn't handle digits w/ underlining, etc.
|
||||
|
||||
/** If this is a Tibetan consonant stack, this returns the root
|
||||
* letter. If this is a Tibetan digit (perhaps with other
|
||||
|
|
|
@ -32,6 +32,7 @@ import java.io.InputStream;
|
|||
public class UnicodeReader {
|
||||
/** You cannot instantiate this class. */
|
||||
private UnicodeReader() { }
|
||||
// DLC NOW
|
||||
|
||||
// public static TTBIR parsePerfectUnicode() {
|
||||
// }
|
||||
|
|
|
@ -18,73 +18,22 @@ Contributor(s): ______________________________________.
|
|||
|
||||
package org.thdl.tib.text.tshegbar;
|
||||
|
||||
/** Constants and static routines (DLC still?) useful in writing state
|
||||
* machines for transforming Unicode input into other forms.
|
||||
/** Constants useful in writing state machines for transforming
|
||||
* Unicode input into other forms.
|
||||
*
|
||||
* @see TransitionInstruction#getCCForCP(char)
|
||||
*
|
||||
* @author David Chandler
|
||||
*/
|
||||
interface UnicodeReadingStateMachineConstants {
|
||||
|
||||
/** Returns the codepoint class for cp, e.g. {@link #CC_SJC}.
|
||||
* @param cp a Unicode codepoint, which MUST be nondecomposable
|
||||
* if it is in the Tibetan range but can be from outside the
|
||||
* Tibetan range of Unicode */
|
||||
static int getCCForCP(char cp) {
|
||||
assert(getNFTHDL(cp) == null);
|
||||
if ('\u0F82' == cp) {
|
||||
return CC_0F82;
|
||||
} else if ('\u0F8A' == cp) {
|
||||
return CC_0F8A;
|
||||
} else if ('\u0F39' == cp) {
|
||||
return CC_0F39;
|
||||
} else if ('\u0F71' == cp) {
|
||||
return CC_ACHUNG;
|
||||
} else if ('\u0F40' <= cp && cp <= '\u0F6A') {
|
||||
assert(cp != '\u0F48');
|
||||
return CC_CON;
|
||||
} else if ('\u0F90' <= cp && cp <= '\u0FBC') {
|
||||
assert(cp != '\u0F98');
|
||||
return CC_SJC;
|
||||
} else if ('\u0F20' <= cp && cp <= '\u0F33') {
|
||||
return CC_DIGIT;
|
||||
} else if (/* DLC NOW do these combine ONLY with digits, or do CC_CM just NOT combine with digits? */
|
||||
'\u0F3E' == cp
|
||||
|| '\u0F3F' == cp
|
||||
|| '\u0F18' == cp
|
||||
|| '\u0F19' == cp) {
|
||||
return CC_MCWD;
|
||||
} else if ('\u0FC6' == cp
|
||||
|| '\u0F87' == cp
|
||||
|| '\u0F86' == cp
|
||||
|| '\u0F84' == cp
|
||||
|| '\u0F83' == cp
|
||||
|| '\u0F82' == cp
|
||||
|| '\u0F7F' == cp
|
||||
|| '\u0F7E' == cp
|
||||
|| '\u0F37' == cp /* DLC NOW NORMALIZATION OF 0F10, 11 to 0F0F ??? */
|
||||
|| '\u0F35' == cp) {
|
||||
return CC_CM;
|
||||
} else if ('\u0F72' == cp
|
||||
|| '\u0F74' == cp
|
||||
|| '\u0F7A' == cp
|
||||
|| '\u0F7B' == cp
|
||||
|| '\u0F7C' == cp
|
||||
|| '\u0F7D' == cp
|
||||
|| '\u0F80' == cp) {
|
||||
// DLC what about U+0F84 ??? CC_V or CC_CM ?
|
||||
return CC_V;
|
||||
} else {
|
||||
return CC_SIN;
|
||||
}
|
||||
}
|
||||
|
||||
// codepoint classes (CC_...) follow. These are mutually
|
||||
// Codepoint classes (CC_...) follow. These are mutually
|
||||
// exclusive, and their union is the whole of Unicode.
|
||||
|
||||
/** for everything else, i.e. non-Tibetan characters like U+0E00
|
||||
* and also Tibetan characters like U+0FCF and U+0F05 (DLC rename
|
||||
* SIN[GLETON] to OTHER as combining marks from outside the
|
||||
* Tibetan range count as this) but not U+0F8A */
|
||||
/** for everything else, that is to say non-Tibetan characters
|
||||
* like U+0E00 and also Tibetan characters like U+0FCF and U+0F05
|
||||
* (DLC rename SIN[GLETON] to OTHER as combining marks from
|
||||
* outside the Tibetan range count as this) but not U+0F8A */
|
||||
static final int CC_SIN = 0;
|
||||
|
||||
/** for combining marks in the Tibetan range of Unicode that
|
||||
|
@ -95,18 +44,18 @@ interface UnicodeReadingStateMachineConstants {
|
|||
* CC_MCWD, U+0F82, and U+0F39 */
|
||||
static final int CC_CM = 2;
|
||||
|
||||
/** for combining consonants, i.e. U+0F90-U+0FBC minus U+0F98
|
||||
* minus the decomposable entries like U+0F93, U+0F9D, U+0FA2,
|
||||
* etc. */
|
||||
/** for combining consonants, that is to say U+0F90-U+0FBC minus
|
||||
* U+0F98 minus the decomposable entries like U+0F93, U+0F9D,
|
||||
* U+0FA2, etc. */
|
||||
static final int CC_SJC = 3;
|
||||
|
||||
/** for noncombining consonants, i.e. U+0F40-U+0F6A minus U+0F48
|
||||
* minus the decomposable entries like U+0F43, U+0F4D, U+0F52,
|
||||
* etc. */
|
||||
/** for noncombining consonants, that is to say U+0F40-U+0F6A
|
||||
* minus U+0F48 minus the decomposable entries like U+0F43,
|
||||
* U+0F4D, U+0F52, etc. */
|
||||
static final int CC_CON = 4;
|
||||
|
||||
/** for simple, nondecomposable vowels, i.e. U+0F72, U+0F74,
|
||||
* U+0F7A, U+0F7B, U+0F7C, U+0F7D, U+0F80 */
|
||||
/** for simple, nondecomposable vowels, that is to say U+0F72,
|
||||
* U+0F74, U+0F7A, U+0F7B, U+0F7C, U+0F7D, U+0F80 */
|
||||
static final int CC_V = 5;
|
||||
|
||||
/** for U+0F8A */
|
||||
|
@ -121,9 +70,9 @@ interface UnicodeReadingStateMachineConstants {
|
|||
static final int CC_0F39 = 8;
|
||||
|
||||
/** for U+0F71 */
|
||||
static final int CC_ACHUNG = 9;
|
||||
static final int CC_SUBSCRIBED_ACHUNG = 9;
|
||||
|
||||
/** for digits, i.e. U+0F20-U+0F33 */
|
||||
/** for digits, that is to say U+0F20-U+0F33 */
|
||||
static final int CC_DIGIT = 10;
|
||||
|
||||
|
||||
|
@ -133,14 +82,14 @@ interface UnicodeReadingStateMachineConstants {
|
|||
/** initial state */
|
||||
static final int STATE_START = 0;
|
||||
|
||||
/** ready state, i.e. the state in which some non-empty Unicode
|
||||
* String is in the holding area, <i>ready</i> to receive
|
||||
/** ready state, that is to say the state in which some non-empty
|
||||
* Unicode String is in the holding area, <i>ready</i> to receive
|
||||
* combining marks like U+0F35 */
|
||||
static final int STATE_READY = 1;
|
||||
|
||||
/** digit state, i.e. the state in which some non-empty Unicode
|
||||
* String consisting entirely of digits is in the holding area,
|
||||
* ready to receive marks that combine only with digits */
|
||||
/** digit state, that is to say the state in which some non-empty
|
||||
* Unicode String consisting entirely of digits is in the holding
|
||||
* area, ready to receive marks that combine only with digits */
|
||||
static final int STATE_DIGIT = 2;
|
||||
|
||||
/** state in which CC_SJC are welcomed and treated as consonants
|
||||
|
|
|
@ -18,14 +18,17 @@ Contributor(s): ______________________________________.
|
|||
|
||||
package org.thdl.tib.text.tshegbar;
|
||||
|
||||
import java.util.Vector;
|
||||
|
||||
class ValidatingUnicodeReader implements UnicodeReadingStateMachineConstants {
|
||||
/** Don't instantiate this class. */
|
||||
private Foo() { super(); }
|
||||
private ValidatingUnicodeReader() { super(); }
|
||||
|
||||
/** This table tells how to transition from state a 6 states + error state */
|
||||
/** This table tells how to transition from state to state upon
|
||||
* encountering certain classes of Unicode codepoints. There are
|
||||
* 6 legal states + an error state. */
|
||||
private static final TransitionInstruction
|
||||
transitionTable[6 /* number of STATEs */]
|
||||
[11 /* number of CC classes */]
|
||||
transitionTable[/* 6 is the number of STATEs */][/* 11 is the number of CC classes */]
|
||||
= {
|
||||
// STATE_START:
|
||||
{
|
||||
|
@ -50,7 +53,7 @@ class ValidatingUnicodeReader implements UnicodeReadingStateMachineConstants {
|
|||
null,
|
||||
/* upon seeing CC_0F39 in this state: */
|
||||
null,
|
||||
/* upon seeing CC_ACHUNG in this state: */
|
||||
/* upon seeing CC_SUBSCRIBED_ACHUNG in this state: */
|
||||
null,
|
||||
/* upon seeing CC_DIGIT in this state: */
|
||||
new TransitionInstruction(STATE_DIGIT,
|
||||
|
@ -73,7 +76,7 @@ class ValidatingUnicodeReader implements UnicodeReadingStateMachineConstants {
|
|||
new TransitionInstruction(STATE_STACKING,
|
||||
ACTION_BEGINS_NEW_GRAPHEME_CLUSTER),
|
||||
/* upon seeing CC_V in this state: */
|
||||
null
|
||||
null,
|
||||
/* upon seeing CC_0F8A in this state: */
|
||||
new TransitionInstruction(STATE_PARTIALMARK,
|
||||
ACTION_BEGINS_NEW_GRAPHEME_CLUSTER),
|
||||
|
@ -82,7 +85,7 @@ class ValidatingUnicodeReader implements UnicodeReadingStateMachineConstants {
|
|||
ACTION_CONTINUES_GRAPHEME_CLUSTER),
|
||||
/* upon seeing CC_0F39 in this state: */
|
||||
null,
|
||||
/* upon seeing CC_ACHUNG in this state: */
|
||||
/* upon seeing CC_SUBSCRIBED_ACHUNG in this state: */
|
||||
null, // because 0F71 comes after SJCs, before Vs, and
|
||||
// before CMs.
|
||||
/* upon seeing CC_DIGIT in this state: */
|
||||
|
@ -115,7 +118,7 @@ class ValidatingUnicodeReader implements UnicodeReadingStateMachineConstants {
|
|||
ACTION_CONTINUES_GRAPHEME_CLUSTER),
|
||||
/* upon seeing CC_0F39 in this state: */
|
||||
null,
|
||||
/* upon seeing CC_ACHUNG in this state: */
|
||||
/* upon seeing CC_SUBSCRIBED_ACHUNG in this state: */
|
||||
null,
|
||||
/* upon seeing CC_DIGIT in this state: */
|
||||
new TransitionInstruction(STATE_DIGIT,
|
||||
|
@ -149,7 +152,7 @@ class ValidatingUnicodeReader implements UnicodeReadingStateMachineConstants {
|
|||
/* upon seeing CC_0F39 in this state: */
|
||||
new TransitionInstruction(STATE_STACKING,
|
||||
ACTION_CONTINUES_GRAPHEME_CLUSTER),
|
||||
/* upon seeing CC_ACHUNG in this state: */
|
||||
/* upon seeing CC_SUBSCRIBED_ACHUNG in this state: */
|
||||
new TransitionInstruction(STATE_STACKPLUSACHUNG,
|
||||
ACTION_CONTINUES_GRAPHEME_CLUSTER),
|
||||
/* upon seeing CC_DIGIT in this state: */
|
||||
|
@ -182,7 +185,7 @@ class ValidatingUnicodeReader implements UnicodeReadingStateMachineConstants {
|
|||
ACTION_CONTINUES_GRAPHEME_CLUSTER),
|
||||
/* upon seeing CC_0F39 in this state: */
|
||||
null,
|
||||
/* upon seeing CC_ACHUNG in this state: */
|
||||
/* upon seeing CC_SUBSCRIBED_ACHUNG in this state: */
|
||||
null,
|
||||
/* upon seeing CC_DIGIT in this state: */
|
||||
new TransitionInstruction(STATE_DIGIT,
|
||||
|
@ -209,48 +212,48 @@ class ValidatingUnicodeReader implements UnicodeReadingStateMachineConstants {
|
|||
ACTION_CONTINUES_GRAPHEME_CLUSTER),
|
||||
/* upon seeing CC_0F39 in this state: */
|
||||
null,
|
||||
/* upon seeing CC_ACHUNG in this state: */
|
||||
/* upon seeing CC_SUBSCRIBED_ACHUNG in this state: */
|
||||
null,
|
||||
/* upon seeing CC_DIGIT in this state: */
|
||||
null
|
||||
}
|
||||
};
|
||||
|
||||
DLC NOW -- clearly, we need LegalSyllable to be convertable to and from GraphemeClusters;
|
||||
/* DLC NOW FIXME -- clearly, we need LegalTshegBar to be convertable to and from UnicodeGraphemeClusters; */
|
||||
|
||||
/** Breaks a sequence of GraphemeClusters into LegalSyllables.
|
||||
@param grcls a sequence of nonnull GraphemeClusters
|
||||
@return a sequence of nonnull LegalSyllables
|
||||
/** Breaks a sequence of UnicodeGraphemeClusters into LegalTshegBars.
|
||||
@param grcls a sequence of nonnull UnicodeGraphemeClusters
|
||||
@return a sequence of nonnull LegalTshegBars
|
||||
@exception TibetanSyntaxException if grcls does not consist
|
||||
entirely of legal Tibetan syllables
|
||||
@see #GraphemeCluster
|
||||
@see #LegalSyllable
|
||||
@see UnicodeGraphemeCluster
|
||||
@see LegalTshegBar
|
||||
*/
|
||||
private static Vector breakGraphemeClustersIntoOnlySyllables(Vector grcls)
|
||||
private static Vector breakGraphemeClustersIntoOnlyTshegBars(Vector grcls)
|
||||
throws TibetanSyntaxException
|
||||
{
|
||||
return breakGraphemeClustersIntoSyllablesAndGraphemeClusters(grcls,
|
||||
return breakGraphemeClustersIntoTshegBarsAndGraphemeClusters(grcls,
|
||||
true);
|
||||
}
|
||||
|
||||
private static Vector breakGraphemeClustersIntoOnlySyllables(Vector grcls) {
|
||||
private static Vector breakLegalGraphemeClustersIntoOnlyTshegBars(Vector grcls) {
|
||||
try {
|
||||
return breakGraphemeClustersIntoSyllablesAndGraphemeClusters(grcls,
|
||||
return breakGraphemeClustersIntoTshegBarsAndGraphemeClusters(grcls,
|
||||
false);
|
||||
} catch (TibetanSyntaxException) {
|
||||
} catch (TibetanSyntaxException ex) {
|
||||
throw new Error("This can never happen, because the second parameter, validating, was false.");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@param grcls a Vector consisting entirely of GraphemeClusters
|
||||
@param grcls a Vector consisting entirely of UnicodeGraphemeClusters
|
||||
@param validate true iff you wish to have a
|
||||
TibetanSyntaxException thrown upon encountering a sequence of
|
||||
GraphemeClusters that is syntactically incorrect Tibetan
|
||||
UnicodeGraphemeClusters that is syntactically incorrect Tibetan
|
||||
@return if validate is true, a Vector consisting entirely of
|
||||
LegalSyllables, else a vector of LegalSyllables and
|
||||
GraphemeClusters */
|
||||
private static Vector breakGraphemeClustersIntoSyllablesAndGraphemeClusters(Vector grcls,
|
||||
LegalTshegBars, else a vector of LegalTshegBars and
|
||||
UnicodeGraphemeClusters */
|
||||
private static Vector breakGraphemeClustersIntoTshegBarsAndGraphemeClusters(Vector grcls,
|
||||
boolean validate)
|
||||
throws TibetanSyntaxException
|
||||
{
|
||||
|
@ -258,8 +261,8 @@ class ValidatingUnicodeReader implements UnicodeReadingStateMachineConstants {
|
|||
int grcls_len = grcls.length();
|
||||
int beginning_of_cluster = 0;
|
||||
for (int i = 0; i < grcls_len; i++) {
|
||||
GraphemeCluster current_grcl
|
||||
= (GraphemeCluster)grcls.elementAt(i);
|
||||
UnicodeGraphemeCluster current_grcl
|
||||
= (UnicodeGraphemeCluster)grcls.elementAt(i);
|
||||
if (current_grcl.isTshegLike()) {
|
||||
if (beginning_of_cluster < i) {
|
||||
// One or more non-tsheg-like grapheme clusters is
|
||||
|
@ -269,7 +272,7 @@ class ValidatingUnicodeReader implements UnicodeReadingStateMachineConstants {
|
|||
beginning_of_cluster,
|
||||
i))
|
||||
{
|
||||
syllables.add(new LegalSyllable(grcls,
|
||||
syllables.add(new LegalTshegBar(grcls,
|
||||
beginning_of_cluster,
|
||||
i, tsheg=current_grcl));
|
||||
}
|
||||
|
@ -299,17 +302,17 @@ class ValidatingUnicodeReader implements UnicodeReadingStateMachineConstants {
|
|||
}
|
||||
|
||||
/** Breaks a string of perfectly-formed Unicode into
|
||||
GraphemeClusters.
|
||||
UnicodeGraphemeClusters.
|
||||
@param nfthdl_unicode a String of NFTHDL-normalized Unicode
|
||||
codepoints
|
||||
@exception Exception if the input is not perfectly formed
|
||||
@return a vector of GraphemeClusters
|
||||
@see #GraphemeCluster
|
||||
@return a vector of UnicodeGraphemeClusters
|
||||
@see UnicodeGraphemeCluster
|
||||
*/
|
||||
private static Vector nonErrorCorrectingReader(String nfthdl_unicode)
|
||||
throws Exception
|
||||
{
|
||||
// a vector of GraphemeClusters that we build up little by
|
||||
// a vector of UnicodeGraphemeClusters that we build up little by
|
||||
// little:
|
||||
Vector grcls = new Vector();
|
||||
int currentState = STATE_START;
|
||||
|
@ -326,7 +329,7 @@ class ValidatingUnicodeReader implements UnicodeReadingStateMachineConstants {
|
|||
} else {
|
||||
switch (ti.getAction()) {
|
||||
case ACTION_BEGINS_NEW_GRAPHEME_CLUSTER:
|
||||
grcls.add(new GraphemeCluster(holdingPen));
|
||||
grcls.add(new UnicodeGraphemeCluster(holdingPen));
|
||||
holdingPen = new StringBuffer();
|
||||
break;
|
||||
case ACTION_CONTINUES_GRAPHEME_CLUSTER:
|
||||
|
|
|
@ -19,12 +19,12 @@ Contributor(s): ______________________________________.
|
|||
package org.thdl.util;
|
||||
import java.io.*;
|
||||
|
||||
/** Used by {@link LinkedList} to provide the implementation of a
|
||||
/** Used by {@link SimplifiedLinkedList} to provide the implementation of a
|
||||
simple dynamic link list.
|
||||
|
||||
@author Andrés Montano Pellegrini
|
||||
@see LinkedList
|
||||
@see ListIterator
|
||||
@see SimplifiedLinkedList
|
||||
@see SimplifiedListIterator
|
||||
*/
|
||||
|
||||
public class Link
|
||||
|
@ -111,4 +111,4 @@ public class Link
|
|||
else siguiente.insertSorted(link);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue