diff --git a/source/org/thdl/tib/text/THDLWylieConstants.java b/source/org/thdl/tib/text/THDLWylieConstants.java index 29a6a52..a3983fe 100644 --- a/source/org/thdl/tib/text/THDLWylieConstants.java +++ b/source/org/thdl/tib/text/THDLWylieConstants.java @@ -19,7 +19,7 @@ Contributor(s): ______________________________________. package org.thdl.tib.text; /** This is where basic, static knowledge of THDL's Extended Wylie is housed. - * @see org.thdl.tib.text#TibetanMachineWeb */ + * @see TibetanMachineWeb */ public interface THDLWylieConstants { /** * the Wylie for bindu/anusvara diff --git a/source/org/thdl/tib/text/tshegbar/LegalTshegBar.java b/source/org/thdl/tib/text/tshegbar/LegalTshegBar.java index 658c631..41bfd2d 100644 --- a/source/org/thdl/tib/text/tshegbar/LegalTshegBar.java +++ b/source/org/thdl/tib/text/tshegbar/LegalTshegBar.java @@ -146,7 +146,7 @@ public final class LegalTshegBar * must not be absent. To learn about the arguments, and to be * sure that your input won't cause an exception to be thrown, * see {@link - * #formsLegalTshegBar(char,char,char,char,boolean,boolean,String,char,char)}. + * #formsLegalTshegBar(char,char,char,char,boolean,boolean,String,char,char,StringBuffer)}. * * @exception IllegalArgumentException if the rootLetter is not * one of the thirty consonants (and represented nominally, at @@ -712,7 +712,7 @@ public final class LegalTshegBar * @exception IllegalArgumentException if the syllable does not * follow the rules of a Tibetan syllable. To learn about the * arguments, see {@link - * #formsLegalTshegBar(char,char,char,char,boolean,boolean,String,char,char)}. */ + * #formsLegalTshegBar(char,char,char,char,boolean,boolean,String,char,char,StringBuffer)}. */ private static void throwIfNotLegalTshegBar(char prefix, char headLetter, char rootLetter, @@ -745,7 +745,7 @@ public final class LegalTshegBar /** If you get through this gauntlet without having an exception * thrown, then this combination makes a legal Tibetan syllable. * To learn about the arguments, see {@link - * #formsLegalTshegBar(char,char,char,char,boolean,boolean,String,char,char)}. + * #formsLegalTshegBar(char,char,char,char,boolean,boolean,String,char,char,StringBuffer)}. * @param errorBuf if non-null, the reason this is illegal will * be written here, if this is illegal * @return true if this syllable is legal, false if this syllable @@ -1257,13 +1257,13 @@ public final class LegalTshegBar * combination of superscribed, root, and subscribed letters) * takes an EWC_ga prefix. * @param head the {@link - * isNominalRepresentationOfConsonant(char) nominal + * #isNominalRepresentationOfConsonant(char) nominal * representation} of the superscribed letter, or EW_ABSENT if * not present * @param root the {@link - * isNominalRepresentationOfConsonant(char) nominal + * #isNominalRepresentationOfConsonant(char) nominal * representation} of the root letter - * @param sub the {@link isNominalRepresentationOfConsonant(char) + * @param sub the {@link #isNominalRepresentationOfConsonant(char) * nominal representation} of the subjoined letter, or EW_ABSENT * if not present */ static boolean takesGao(char head, char root, char sub) { @@ -1289,13 +1289,13 @@ public final class LegalTshegBar * combination of superscribed, root, and subscribed letters) * takes an EWC_da prefix. * @param head the {@link - * isNominalRepresentationOfConsonant(char) nominal + * #isNominalRepresentationOfConsonant(char) nominal * representation} of the superscribed letter, or EW_ABSENT if * not present * @param root the {@link - * isNominalRepresentationOfConsonant(char) nominal + * #isNominalRepresentationOfConsonant(char) nominal * representation} of the root letter - * @param sub the {@link isNominalRepresentationOfConsonant(char) + * @param sub the {@link #isNominalRepresentationOfConsonant(char) * nominal representation} of the subjoined letter, or EW_ABSENT * if not present */ static boolean takesDao(char head, char root, char sub) { @@ -1327,13 +1327,13 @@ public final class LegalTshegBar * combination of superscribed, root, and subscribed letters) * takes an EWC_achung prefix. * @param head the {@link - * isNominalRepresentationOfConsonant(char) nominal + * #isNominalRepresentationOfConsonant(char) nominal * representation} of the superscribed letter, or EW_ABSENT if * not present * @param root the {@link - * isNominalRepresentationOfConsonant(char) nominal + * #isNominalRepresentationOfConsonant(char) nominal * representation} of the root letter - * @param sub the {@link isNominalRepresentationOfConsonant(char) + * @param sub the {@link #isNominalRepresentationOfConsonant(char) * nominal representation} of the subjoined letter, or EW_ABSENT * if not present */ static boolean takesAchungPrefix(char head, char root, char sub) { @@ -1370,13 +1370,13 @@ public final class LegalTshegBar * combination of superscribed, root, and subscribed letters) * takes an EWC_ma prefix. * @param head the {@link - * isNominalRepresentationOfConsonant(char) nominal + * #isNominalRepresentationOfConsonant(char) nominal * representation} of the superscribed letter, or EW_ABSENT if * not present * @param root the {@link - * isNominalRepresentationOfConsonant(char) nominal + * #isNominalRepresentationOfConsonant(char) nominal * representation} of the root letter - * @param sub the {@link isNominalRepresentationOfConsonant(char) + * @param sub the {@link #isNominalRepresentationOfConsonant(char) * nominal representation} of the subjoined letter, or EW_ABSENT * if not present */ static boolean takesMao(char head, char root, char sub) { @@ -1409,13 +1409,13 @@ public final class LegalTshegBar * combination of superscribed, root, and subscribed letters) * takes an EWC_ba prefix. * @param head the {@link - * isNominalRepresentationOfConsonant(char) nominal + * #isNominalRepresentationOfConsonant(char) nominal * representation} of the superscribed letter, or EW_ABSENT if * not present * @param root the {@link - * isNominalRepresentationOfConsonant(char) nominal + * #isNominalRepresentationOfConsonant(char) nominal * representation} of the root letter - * @param sub the {@link isNominalRepresentationOfConsonant(char) + * @param sub the {@link #isNominalRepresentationOfConsonant(char) * nominal representation} of the subjoined letter, or EW_ABSENT * if not present */ static boolean takesBao(char head, char root, char sub) { diff --git a/source/org/thdl/tib/text/tshegbar/TibetanSyntaxException.java b/source/org/thdl/tib/text/tshegbar/TibetanSyntaxException.java index db7561b..0a40dad 100644 --- a/source/org/thdl/tib/text/tshegbar/TibetanSyntaxException.java +++ b/source/org/thdl/tib/text/tshegbar/TibetanSyntaxException.java @@ -18,6 +18,8 @@ Contributor(s): ______________________________________. package org.thdl.tib.text.tshegbar; +import java.util.Vector; + /** DLC FIXMEDOC: says "this isn't legal Tibetan", not "this isn't a valid sequence of Unicode" */ class TibetanSyntaxException extends Exception { /** This constructor creates an exception with a less than helpful @@ -46,6 +48,6 @@ class TibetanSyntaxException extends Exception { GraphemeCluster in the syntactically incorrect stretch of Tibetan. */ TibetanSyntaxException(Vector grcls, int start, int end) { - DLC NOW; + throw new Error("DLC NOW"); } } diff --git a/source/org/thdl/tib/text/tshegbar/TransitionInstruction.java b/source/org/thdl/tib/text/tshegbar/TransitionInstruction.java index 5da8fe4..8169e11 100644 --- a/source/org/thdl/tib/text/tshegbar/TransitionInstruction.java +++ b/source/org/thdl/tib/text/tshegbar/TransitionInstruction.java @@ -18,6 +18,8 @@ Contributor(s): ______________________________________. package org.thdl.tib.text.tshegbar; +import org.thdl.util.ThdlDebug; + /** DLC FIXMEDOC */ class TransitionInstruction implements UnicodeReadingStateMachineConstants { @@ -25,19 +27,19 @@ class TransitionInstruction implements UnicodeReadingStateMachineConstants { TransitionInstruction(int nextState, int action) { super(); - assert(action == ACTION_CONTINUES_GRAPHEME_CLUSTER - || action == ACTION_BEGINS_NEW_GRAPHEME_CLUSTER - || action == ACTION_PREPEND_WITH_0F68); + ThdlDebug.verify(action == ACTION_CONTINUES_GRAPHEME_CLUSTER + || action == ACTION_BEGINS_NEW_GRAPHEME_CLUSTER + || action == ACTION_PREPEND_WITH_0F68); // DLC FIXME: assert this. - assert(nextState == STATE_START - || nextState == STATE_READY - || nextState == STATE_DIGIT - || nextState == STATE_STACKING - || nextState == STATE_STACKPLUSACHUNG - || nextState == STATE_PARTIALMARK); + ThdlDebug.verify(nextState == STATE_START + || nextState == STATE_READY + || nextState == STATE_DIGIT + || nextState == STATE_STACKING + || nextState == STATE_STACKPLUSACHUNG + || nextState == STATE_PARTIALMARK); // DLC FIXME: assert this. // we start in the start state, but we can never return to it. - assert(nextState != STATE_START); + ThdlDebug.verify(nextState != STATE_START); // DLC FIXME: assert this. this.nextState = nextState; this.action = action; @@ -55,4 +57,60 @@ class TransitionInstruction implements UnicodeReadingStateMachineConstants { int getAction() { return action; } int getNextState() { return nextState; } + + + /** Returns the codepoint class for cp, e.g. {@link + * UnicodeReadingStateMachineConstants#CC_SJC}. + * @param cp a Unicode codepoint, which MUST be nondecomposable + * if it is in the Tibetan range but can be from outside the + * Tibetan range of Unicode */ + static int getCCForCP(char cp) { + ThdlDebug.verify(getNFTHDL(cp) == null); // DLC FIXME: assert this + if ('\u0F82' == cp) { + return CC_0F82; + } else if ('\u0F8A' == cp) { + return CC_0F8A; + } else if ('\u0F39' == cp) { + return CC_0F39; + } else if ('\u0F71' == cp) { + return CC_SUBSCRIBED_ACHUNG; + } else if ('\u0F40' <= cp && cp <= '\u0F6A') { + ThdlDebug.verify(cp != '\u0F48'); // DLC FIXME: assert this + return CC_CON; + } else if ('\u0F90' <= cp && cp <= '\u0FBC') { + ThdlDebug.verify(cp != '\u0F98'); // DLC FIXME: assert this + return CC_SJC; + } else if ('\u0F20' <= cp && cp <= '\u0F33') { + return CC_DIGIT; + } else if (/* DLC NOW do these combine ONLY with digits, or do CC_CM just NOT combine with digits? */ + '\u0F3E' == cp + || '\u0F3F' == cp + || '\u0F18' == cp + || '\u0F19' == cp) { + return CC_MCWD; + } else if ('\u0FC6' == cp + || '\u0F87' == cp + || '\u0F86' == cp + || '\u0F84' == cp + || '\u0F83' == cp + || '\u0F82' == cp + || '\u0F7F' == cp + || '\u0F7E' == cp + || '\u0F37' == cp /* DLC NOW NORMALIZATION OF 0F10, 11 to 0F0F ??? */ + || '\u0F35' == cp) { + return CC_CM; + } else if ('\u0F72' == cp + || '\u0F74' == cp + || '\u0F7A' == cp + || '\u0F7B' == cp + || '\u0F7C' == cp + || '\u0F7D' == cp + || '\u0F80' == cp) { + // DLC what about U+0F84 ??? CC_V or CC_CM ? + return CC_V; + } else { + return CC_SIN; + } + } + } diff --git a/source/org/thdl/tib/text/tshegbar/UnicodeGraphemeCluster.java b/source/org/thdl/tib/text/tshegbar/UnicodeGraphemeCluster.java index d8f8ade..ad298fc 100644 --- a/source/org/thdl/tib/text/tshegbar/UnicodeGraphemeCluster.java +++ b/source/org/thdl/tib/text/tshegbar/UnicodeGraphemeCluster.java @@ -387,7 +387,7 @@ public class UnicodeGraphemeCluster throw new Error("DLC FIXME"); } - // DLC NOW -- LegalSyllable doesn't handle digits w/ underlining, etc. + // DLC NOW -- LegalTshegBar doesn't handle digits w/ underlining, etc. /** If this is a Tibetan consonant stack, this returns the root * letter. If this is a Tibetan digit (perhaps with other diff --git a/source/org/thdl/tib/text/tshegbar/UnicodeReader.java b/source/org/thdl/tib/text/tshegbar/UnicodeReader.java index afc57e0..ab21b3e 100644 --- a/source/org/thdl/tib/text/tshegbar/UnicodeReader.java +++ b/source/org/thdl/tib/text/tshegbar/UnicodeReader.java @@ -32,6 +32,7 @@ import java.io.InputStream; public class UnicodeReader { /** You cannot instantiate this class. */ private UnicodeReader() { } + // DLC NOW // public static TTBIR parsePerfectUnicode() { // } diff --git a/source/org/thdl/tib/text/tshegbar/UnicodeReadingStateMachineConstants.java b/source/org/thdl/tib/text/tshegbar/UnicodeReadingStateMachineConstants.java index e4a7418..25e78f3 100644 --- a/source/org/thdl/tib/text/tshegbar/UnicodeReadingStateMachineConstants.java +++ b/source/org/thdl/tib/text/tshegbar/UnicodeReadingStateMachineConstants.java @@ -18,73 +18,22 @@ Contributor(s): ______________________________________. package org.thdl.tib.text.tshegbar; -/** Constants and static routines (DLC still?) useful in writing state - * machines for transforming Unicode input into other forms. +/** Constants useful in writing state machines for transforming + * Unicode input into other forms. + * + * @see TransitionInstruction#getCCForCP(char) * * @author David Chandler */ interface UnicodeReadingStateMachineConstants { - /** Returns the codepoint class for cp, e.g. {@link #CC_SJC}. - * @param cp a Unicode codepoint, which MUST be nondecomposable - * if it is in the Tibetan range but can be from outside the - * Tibetan range of Unicode */ - static int getCCForCP(char cp) { - assert(getNFTHDL(cp) == null); - if ('\u0F82' == cp) { - return CC_0F82; - } else if ('\u0F8A' == cp) { - return CC_0F8A; - } else if ('\u0F39' == cp) { - return CC_0F39; - } else if ('\u0F71' == cp) { - return CC_ACHUNG; - } else if ('\u0F40' <= cp && cp <= '\u0F6A') { - assert(cp != '\u0F48'); - return CC_CON; - } else if ('\u0F90' <= cp && cp <= '\u0FBC') { - assert(cp != '\u0F98'); - return CC_SJC; - } else if ('\u0F20' <= cp && cp <= '\u0F33') { - return CC_DIGIT; - } else if (/* DLC NOW do these combine ONLY with digits, or do CC_CM just NOT combine with digits? */ - '\u0F3E' == cp - || '\u0F3F' == cp - || '\u0F18' == cp - || '\u0F19' == cp) { - return CC_MCWD; - } else if ('\u0FC6' == cp - || '\u0F87' == cp - || '\u0F86' == cp - || '\u0F84' == cp - || '\u0F83' == cp - || '\u0F82' == cp - || '\u0F7F' == cp - || '\u0F7E' == cp - || '\u0F37' == cp /* DLC NOW NORMALIZATION OF 0F10, 11 to 0F0F ??? */ - || '\u0F35' == cp) { - return CC_CM; - } else if ('\u0F72' == cp - || '\u0F74' == cp - || '\u0F7A' == cp - || '\u0F7B' == cp - || '\u0F7C' == cp - || '\u0F7D' == cp - || '\u0F80' == cp) { - // DLC what about U+0F84 ??? CC_V or CC_CM ? - return CC_V; - } else { - return CC_SIN; - } - } - - // codepoint classes (CC_...) follow. These are mutually + // Codepoint classes (CC_...) follow. These are mutually // exclusive, and their union is the whole of Unicode. - /** for everything else, i.e. non-Tibetan characters like U+0E00 - * and also Tibetan characters like U+0FCF and U+0F05 (DLC rename - * SIN[GLETON] to OTHER as combining marks from outside the - * Tibetan range count as this) but not U+0F8A */ + /** for everything else, that is to say non-Tibetan characters + * like U+0E00 and also Tibetan characters like U+0FCF and U+0F05 + * (DLC rename SIN[GLETON] to OTHER as combining marks from + * outside the Tibetan range count as this) but not U+0F8A */ static final int CC_SIN = 0; /** for combining marks in the Tibetan range of Unicode that @@ -95,18 +44,18 @@ interface UnicodeReadingStateMachineConstants { * CC_MCWD, U+0F82, and U+0F39 */ static final int CC_CM = 2; - /** for combining consonants, i.e. U+0F90-U+0FBC minus U+0F98 - * minus the decomposable entries like U+0F93, U+0F9D, U+0FA2, - * etc. */ + /** for combining consonants, that is to say U+0F90-U+0FBC minus + * U+0F98 minus the decomposable entries like U+0F93, U+0F9D, + * U+0FA2, etc. */ static final int CC_SJC = 3; - /** for noncombining consonants, i.e. U+0F40-U+0F6A minus U+0F48 - * minus the decomposable entries like U+0F43, U+0F4D, U+0F52, - * etc. */ + /** for noncombining consonants, that is to say U+0F40-U+0F6A + * minus U+0F48 minus the decomposable entries like U+0F43, + * U+0F4D, U+0F52, etc. */ static final int CC_CON = 4; - /** for simple, nondecomposable vowels, i.e. U+0F72, U+0F74, - * U+0F7A, U+0F7B, U+0F7C, U+0F7D, U+0F80 */ + /** for simple, nondecomposable vowels, that is to say U+0F72, + * U+0F74, U+0F7A, U+0F7B, U+0F7C, U+0F7D, U+0F80 */ static final int CC_V = 5; /** for U+0F8A */ @@ -121,9 +70,9 @@ interface UnicodeReadingStateMachineConstants { static final int CC_0F39 = 8; /** for U+0F71 */ - static final int CC_ACHUNG = 9; + static final int CC_SUBSCRIBED_ACHUNG = 9; - /** for digits, i.e. U+0F20-U+0F33 */ + /** for digits, that is to say U+0F20-U+0F33 */ static final int CC_DIGIT = 10; @@ -133,14 +82,14 @@ interface UnicodeReadingStateMachineConstants { /** initial state */ static final int STATE_START = 0; - /** ready state, i.e. the state in which some non-empty Unicode - * String is in the holding area, ready to receive + /** ready state, that is to say the state in which some non-empty + * Unicode String is in the holding area, ready to receive * combining marks like U+0F35 */ static final int STATE_READY = 1; - /** digit state, i.e. the state in which some non-empty Unicode - * String consisting entirely of digits is in the holding area, - * ready to receive marks that combine only with digits */ + /** digit state, that is to say the state in which some non-empty + * Unicode String consisting entirely of digits is in the holding + * area, ready to receive marks that combine only with digits */ static final int STATE_DIGIT = 2; /** state in which CC_SJC are welcomed and treated as consonants diff --git a/source/org/thdl/tib/text/tshegbar/ValidatingUnicodeReader.java b/source/org/thdl/tib/text/tshegbar/ValidatingUnicodeReader.java index 4084444..e3ad84e 100644 --- a/source/org/thdl/tib/text/tshegbar/ValidatingUnicodeReader.java +++ b/source/org/thdl/tib/text/tshegbar/ValidatingUnicodeReader.java @@ -18,14 +18,17 @@ Contributor(s): ______________________________________. package org.thdl.tib.text.tshegbar; +import java.util.Vector; + class ValidatingUnicodeReader implements UnicodeReadingStateMachineConstants { /** Don't instantiate this class. */ - private Foo() { super(); } + private ValidatingUnicodeReader() { super(); } - /** This table tells how to transition from state a 6 states + error state */ + /** This table tells how to transition from state to state upon + * encountering certain classes of Unicode codepoints. There are + * 6 legal states + an error state. */ private static final TransitionInstruction - transitionTable[6 /* number of STATEs */] - [11 /* number of CC classes */] + transitionTable[/* 6 is the number of STATEs */][/* 11 is the number of CC classes */] = { // STATE_START: { @@ -50,7 +53,7 @@ class ValidatingUnicodeReader implements UnicodeReadingStateMachineConstants { null, /* upon seeing CC_0F39 in this state: */ null, - /* upon seeing CC_ACHUNG in this state: */ + /* upon seeing CC_SUBSCRIBED_ACHUNG in this state: */ null, /* upon seeing CC_DIGIT in this state: */ new TransitionInstruction(STATE_DIGIT, @@ -73,7 +76,7 @@ class ValidatingUnicodeReader implements UnicodeReadingStateMachineConstants { new TransitionInstruction(STATE_STACKING, ACTION_BEGINS_NEW_GRAPHEME_CLUSTER), /* upon seeing CC_V in this state: */ - null + null, /* upon seeing CC_0F8A in this state: */ new TransitionInstruction(STATE_PARTIALMARK, ACTION_BEGINS_NEW_GRAPHEME_CLUSTER), @@ -82,7 +85,7 @@ class ValidatingUnicodeReader implements UnicodeReadingStateMachineConstants { ACTION_CONTINUES_GRAPHEME_CLUSTER), /* upon seeing CC_0F39 in this state: */ null, - /* upon seeing CC_ACHUNG in this state: */ + /* upon seeing CC_SUBSCRIBED_ACHUNG in this state: */ null, // because 0F71 comes after SJCs, before Vs, and // before CMs. /* upon seeing CC_DIGIT in this state: */ @@ -115,7 +118,7 @@ class ValidatingUnicodeReader implements UnicodeReadingStateMachineConstants { ACTION_CONTINUES_GRAPHEME_CLUSTER), /* upon seeing CC_0F39 in this state: */ null, - /* upon seeing CC_ACHUNG in this state: */ + /* upon seeing CC_SUBSCRIBED_ACHUNG in this state: */ null, /* upon seeing CC_DIGIT in this state: */ new TransitionInstruction(STATE_DIGIT, @@ -149,7 +152,7 @@ class ValidatingUnicodeReader implements UnicodeReadingStateMachineConstants { /* upon seeing CC_0F39 in this state: */ new TransitionInstruction(STATE_STACKING, ACTION_CONTINUES_GRAPHEME_CLUSTER), - /* upon seeing CC_ACHUNG in this state: */ + /* upon seeing CC_SUBSCRIBED_ACHUNG in this state: */ new TransitionInstruction(STATE_STACKPLUSACHUNG, ACTION_CONTINUES_GRAPHEME_CLUSTER), /* upon seeing CC_DIGIT in this state: */ @@ -182,7 +185,7 @@ class ValidatingUnicodeReader implements UnicodeReadingStateMachineConstants { ACTION_CONTINUES_GRAPHEME_CLUSTER), /* upon seeing CC_0F39 in this state: */ null, - /* upon seeing CC_ACHUNG in this state: */ + /* upon seeing CC_SUBSCRIBED_ACHUNG in this state: */ null, /* upon seeing CC_DIGIT in this state: */ new TransitionInstruction(STATE_DIGIT, @@ -209,48 +212,48 @@ class ValidatingUnicodeReader implements UnicodeReadingStateMachineConstants { ACTION_CONTINUES_GRAPHEME_CLUSTER), /* upon seeing CC_0F39 in this state: */ null, - /* upon seeing CC_ACHUNG in this state: */ + /* upon seeing CC_SUBSCRIBED_ACHUNG in this state: */ null, /* upon seeing CC_DIGIT in this state: */ null } }; - DLC NOW -- clearly, we need LegalSyllable to be convertable to and from GraphemeClusters; + /* DLC NOW FIXME -- clearly, we need LegalTshegBar to be convertable to and from UnicodeGraphemeClusters; */ - /** Breaks a sequence of GraphemeClusters into LegalSyllables. - @param grcls a sequence of nonnull GraphemeClusters - @return a sequence of nonnull LegalSyllables + /** Breaks a sequence of UnicodeGraphemeClusters into LegalTshegBars. + @param grcls a sequence of nonnull UnicodeGraphemeClusters + @return a sequence of nonnull LegalTshegBars @exception TibetanSyntaxException if grcls does not consist entirely of legal Tibetan syllables - @see #GraphemeCluster - @see #LegalSyllable + @see UnicodeGraphemeCluster + @see LegalTshegBar */ - private static Vector breakGraphemeClustersIntoOnlySyllables(Vector grcls) + private static Vector breakGraphemeClustersIntoOnlyTshegBars(Vector grcls) throws TibetanSyntaxException { - return breakGraphemeClustersIntoSyllablesAndGraphemeClusters(grcls, + return breakGraphemeClustersIntoTshegBarsAndGraphemeClusters(grcls, true); } - private static Vector breakGraphemeClustersIntoOnlySyllables(Vector grcls) { + private static Vector breakLegalGraphemeClustersIntoOnlyTshegBars(Vector grcls) { try { - return breakGraphemeClustersIntoSyllablesAndGraphemeClusters(grcls, + return breakGraphemeClustersIntoTshegBarsAndGraphemeClusters(grcls, false); - } catch (TibetanSyntaxException) { + } catch (TibetanSyntaxException ex) { throw new Error("This can never happen, because the second parameter, validating, was false."); } } /** - @param grcls a Vector consisting entirely of GraphemeClusters + @param grcls a Vector consisting entirely of UnicodeGraphemeClusters @param validate true iff you wish to have a TibetanSyntaxException thrown upon encountering a sequence of - GraphemeClusters that is syntactically incorrect Tibetan + UnicodeGraphemeClusters that is syntactically incorrect Tibetan @return if validate is true, a Vector consisting entirely of - LegalSyllables, else a vector of LegalSyllables and - GraphemeClusters */ - private static Vector breakGraphemeClustersIntoSyllablesAndGraphemeClusters(Vector grcls, + LegalTshegBars, else a vector of LegalTshegBars and + UnicodeGraphemeClusters */ + private static Vector breakGraphemeClustersIntoTshegBarsAndGraphemeClusters(Vector grcls, boolean validate) throws TibetanSyntaxException { @@ -258,8 +261,8 @@ class ValidatingUnicodeReader implements UnicodeReadingStateMachineConstants { int grcls_len = grcls.length(); int beginning_of_cluster = 0; for (int i = 0; i < grcls_len; i++) { - GraphemeCluster current_grcl - = (GraphemeCluster)grcls.elementAt(i); + UnicodeGraphemeCluster current_grcl + = (UnicodeGraphemeCluster)grcls.elementAt(i); if (current_grcl.isTshegLike()) { if (beginning_of_cluster < i) { // One or more non-tsheg-like grapheme clusters is @@ -269,7 +272,7 @@ class ValidatingUnicodeReader implements UnicodeReadingStateMachineConstants { beginning_of_cluster, i)) { - syllables.add(new LegalSyllable(grcls, + syllables.add(new LegalTshegBar(grcls, beginning_of_cluster, i, tsheg=current_grcl)); } @@ -299,17 +302,17 @@ class ValidatingUnicodeReader implements UnicodeReadingStateMachineConstants { } /** Breaks a string of perfectly-formed Unicode into - GraphemeClusters. + UnicodeGraphemeClusters. @param nfthdl_unicode a String of NFTHDL-normalized Unicode codepoints @exception Exception if the input is not perfectly formed - @return a vector of GraphemeClusters - @see #GraphemeCluster + @return a vector of UnicodeGraphemeClusters + @see UnicodeGraphemeCluster */ private static Vector nonErrorCorrectingReader(String nfthdl_unicode) throws Exception { - // a vector of GraphemeClusters that we build up little by + // a vector of UnicodeGraphemeClusters that we build up little by // little: Vector grcls = new Vector(); int currentState = STATE_START; @@ -326,7 +329,7 @@ class ValidatingUnicodeReader implements UnicodeReadingStateMachineConstants { } else { switch (ti.getAction()) { case ACTION_BEGINS_NEW_GRAPHEME_CLUSTER: - grcls.add(new GraphemeCluster(holdingPen)); + grcls.add(new UnicodeGraphemeCluster(holdingPen)); holdingPen = new StringBuffer(); break; case ACTION_CONTINUES_GRAPHEME_CLUSTER: diff --git a/source/org/thdl/util/Link.java b/source/org/thdl/util/Link.java index 29a3124..04f5d53 100644 --- a/source/org/thdl/util/Link.java +++ b/source/org/thdl/util/Link.java @@ -19,12 +19,12 @@ Contributor(s): ______________________________________. package org.thdl.util; import java.io.*; -/** Used by {@link LinkedList} to provide the implementation of a +/** Used by {@link SimplifiedLinkedList} to provide the implementation of a simple dynamic link list. @author Andrés Montano Pellegrini - @see LinkedList - @see ListIterator + @see SimplifiedLinkedList + @see SimplifiedListIterator */ public class Link @@ -111,4 +111,4 @@ public class Link else siguiente.insertSorted(link); } -} \ No newline at end of file +}