ant private-javadocs runs without warnings; cleaned up some

as-yet-unused code.
This commit is contained in:
dchandler 2003-04-13 01:46:20 +00:00
parent 644c0d3801
commit 6636d03a41
9 changed files with 158 additions and 145 deletions

View file

@ -18,73 +18,22 @@ Contributor(s): ______________________________________.
package org.thdl.tib.text.tshegbar;
/** Constants and static routines (DLC still?) useful in writing state
* machines for transforming Unicode input into other forms.
/** Constants useful in writing state machines for transforming
* Unicode input into other forms.
*
* @see TransitionInstruction#getCCForCP(char)
*
* @author David Chandler
*/
interface UnicodeReadingStateMachineConstants {
/** Returns the codepoint class for cp, e.g. {@link #CC_SJC}.
* @param cp a Unicode codepoint, which MUST be nondecomposable
* if it is in the Tibetan range but can be from outside the
* Tibetan range of Unicode */
static int getCCForCP(char cp) {
assert(getNFTHDL(cp) == null);
if ('\u0F82' == cp) {
return CC_0F82;
} else if ('\u0F8A' == cp) {
return CC_0F8A;
} else if ('\u0F39' == cp) {
return CC_0F39;
} else if ('\u0F71' == cp) {
return CC_ACHUNG;
} else if ('\u0F40' <= cp && cp <= '\u0F6A') {
assert(cp != '\u0F48');
return CC_CON;
} else if ('\u0F90' <= cp && cp <= '\u0FBC') {
assert(cp != '\u0F98');
return CC_SJC;
} else if ('\u0F20' <= cp && cp <= '\u0F33') {
return CC_DIGIT;
} else if (/* DLC NOW do these combine ONLY with digits, or do CC_CM just NOT combine with digits? */
'\u0F3E' == cp
|| '\u0F3F' == cp
|| '\u0F18' == cp
|| '\u0F19' == cp) {
return CC_MCWD;
} else if ('\u0FC6' == cp
|| '\u0F87' == cp
|| '\u0F86' == cp
|| '\u0F84' == cp
|| '\u0F83' == cp
|| '\u0F82' == cp
|| '\u0F7F' == cp
|| '\u0F7E' == cp
|| '\u0F37' == cp /* DLC NOW NORMALIZATION OF 0F10, 11 to 0F0F ??? */
|| '\u0F35' == cp) {
return CC_CM;
} else if ('\u0F72' == cp
|| '\u0F74' == cp
|| '\u0F7A' == cp
|| '\u0F7B' == cp
|| '\u0F7C' == cp
|| '\u0F7D' == cp
|| '\u0F80' == cp) {
// DLC what about U+0F84 ??? CC_V or CC_CM ?
return CC_V;
} else {
return CC_SIN;
}
}
// codepoint classes (CC_...) follow. These are mutually
// Codepoint classes (CC_...) follow. These are mutually
// exclusive, and their union is the whole of Unicode.
/** for everything else, i.e. non-Tibetan characters like U+0E00
* and also Tibetan characters like U+0FCF and U+0F05 (DLC rename
* SIN[GLETON] to OTHER as combining marks from outside the
* Tibetan range count as this) but not U+0F8A */
/** for everything else, that is to say non-Tibetan characters
* like U+0E00 and also Tibetan characters like U+0FCF and U+0F05
* (DLC rename SIN[GLETON] to OTHER as combining marks from
* outside the Tibetan range count as this) but not U+0F8A */
static final int CC_SIN = 0;
/** for combining marks in the Tibetan range of Unicode that
@ -95,18 +44,18 @@ interface UnicodeReadingStateMachineConstants {
* CC_MCWD, U+0F82, and U+0F39 */
static final int CC_CM = 2;
/** for combining consonants, i.e. U+0F90-U+0FBC minus U+0F98
* minus the decomposable entries like U+0F93, U+0F9D, U+0FA2,
* etc. */
/** for combining consonants, that is to say U+0F90-U+0FBC minus
* U+0F98 minus the decomposable entries like U+0F93, U+0F9D,
* U+0FA2, etc. */
static final int CC_SJC = 3;
/** for noncombining consonants, i.e. U+0F40-U+0F6A minus U+0F48
* minus the decomposable entries like U+0F43, U+0F4D, U+0F52,
* etc. */
/** for noncombining consonants, that is to say U+0F40-U+0F6A
* minus U+0F48 minus the decomposable entries like U+0F43,
* U+0F4D, U+0F52, etc. */
static final int CC_CON = 4;
/** for simple, nondecomposable vowels, i.e. U+0F72, U+0F74,
* U+0F7A, U+0F7B, U+0F7C, U+0F7D, U+0F80 */
/** for simple, nondecomposable vowels, that is to say U+0F72,
* U+0F74, U+0F7A, U+0F7B, U+0F7C, U+0F7D, U+0F80 */
static final int CC_V = 5;
/** for U+0F8A */
@ -121,9 +70,9 @@ interface UnicodeReadingStateMachineConstants {
static final int CC_0F39 = 8;
/** for U+0F71 */
static final int CC_ACHUNG = 9;
static final int CC_SUBSCRIBED_ACHUNG = 9;
/** for digits, i.e. U+0F20-U+0F33 */
/** for digits, that is to say U+0F20-U+0F33 */
static final int CC_DIGIT = 10;
@ -133,14 +82,14 @@ interface UnicodeReadingStateMachineConstants {
/** initial state */
static final int STATE_START = 0;
/** ready state, i.e. the state in which some non-empty Unicode
* String is in the holding area, <i>ready</i> to receive
/** ready state, that is to say the state in which some non-empty
* Unicode String is in the holding area, <i>ready</i> to receive
* combining marks like U+0F35 */
static final int STATE_READY = 1;
/** digit state, i.e. the state in which some non-empty Unicode
* String consisting entirely of digits is in the holding area,
* ready to receive marks that combine only with digits */
/** digit state, that is to say the state in which some non-empty
* Unicode String consisting entirely of digits is in the holding
* area, ready to receive marks that combine only with digits */
static final int STATE_DIGIT = 2;
/** state in which CC_SJC are welcomed and treated as consonants