ant private-javadocs runs without warnings; cleaned up some

as-yet-unused code.
2003-04-13 01:46:20 +00:00 · 2003-04-13 01:46:20 +00:00 · 6636d03a41
commit 6636d03a41
parent 644c0d3801
9 changed files with 158 additions and 145 deletions
--- a/source/org/thdl/tib/text/tshegbar/UnicodeReadingStateMachineConstants.java
+++ b/source/org/thdl/tib/text/tshegbar/UnicodeReadingStateMachineConstants.java
@ -18,73 +18,22 @@ Contributor(s): ______________________________________.

 package org.thdl.tib.text.tshegbar;

-/** Constants and static routines (DLC still?) useful in writing state
- *  machines for transforming Unicode input into other forms.
+/** Constants useful in writing state machines for transforming
+ *  Unicode input into other forms.
+ *
+ *  @see TransitionInstruction#getCCForCP(char)
 *
 *  @author David Chandler
 */
 interface UnicodeReadingStateMachineConstants {

-    /** Returns the codepoint class for cp, e.g. {@link #CC_SJC}.
-     *  @param cp a Unicode codepoint, which MUST be nondecomposable
-     *  if it is in the Tibetan range but can be from outside the
-     *  Tibetan range of Unicode */
-    static int getCCForCP(char cp) {
-        assert(getNFTHDL(cp) == null);
-        if ('\u0F82' == cp) {
-            return CC_0F82;
-        } else if ('\u0F8A' == cp) {
-            return CC_0F8A;
-        } else if ('\u0F39' == cp) {
-            return CC_0F39;
-        } else if ('\u0F71' == cp) {
-            return CC_ACHUNG;
-        } else if ('\u0F40' <= cp && cp <= '\u0F6A') {
-            assert(cp != '\u0F48');
-            return CC_CON;
-        } else if ('\u0F90' <= cp && cp <= '\u0FBC') {
-            assert(cp != '\u0F98');
-            return CC_SJC;
-        } else if ('\u0F20' <= cp && cp <= '\u0F33') {
-            return CC_DIGIT;
-        } else if (/* DLC NOW do these combine ONLY with digits, or do CC_CM just NOT combine with digits? */
-                   '\u0F3E' == cp
-                   || '\u0F3F' == cp
-                   || '\u0F18' == cp
-                   || '\u0F19' == cp) {
-            return CC_MCWD;
-        } else if ('\u0FC6' == cp
-                   || '\u0F87' == cp
-                   || '\u0F86' == cp
-                   || '\u0F84' == cp
-                   || '\u0F83' == cp
-                   || '\u0F82' == cp
-                   || '\u0F7F' == cp
-                   || '\u0F7E' == cp
-                   || '\u0F37' == cp /* DLC NOW NORMALIZATION OF 0F10, 11 to 0F0F ??? */
-                   || '\u0F35' == cp) {
-            return CC_CM;
-        } else if ('\u0F72' == cp
-                   || '\u0F74' == cp
-                   || '\u0F7A' == cp
-                   || '\u0F7B' == cp
-                   || '\u0F7C' == cp
-                   || '\u0F7D' == cp
-                   || '\u0F80' == cp) {
-            // DLC what about U+0F84 ??? CC_V or CC_CM ?
-            return CC_V;
-        } else {
-            return CC_SIN;
-        }
-    }
-
-    // codepoint classes (CC_...) follow.  These are mutually
+    // Codepoint classes (CC_...) follow.  These are mutually
    // exclusive, and their union is the whole of Unicode.

-    /** for everything else, i.e. non-Tibetan characters like U+0E00
-     *  and also Tibetan characters like U+0FCF and U+0F05 (DLC rename
-     *  SIN[GLETON] to OTHER as combining marks from outside the
-     *  Tibetan range count as this) but not U+0F8A */
+    /** for everything else, that is to say non-Tibetan characters
+     *  like U+0E00 and also Tibetan characters like U+0FCF and U+0F05
+     *  (DLC rename SIN[GLETON] to OTHER as combining marks from
+     *  outside the Tibetan range count as this) but not U+0F8A */
    static final int CC_SIN = 0;

    /** for combining marks in the Tibetan range of Unicode that
@ -95,18 +44,18 @@ interface UnicodeReadingStateMachineConstants {
     *  CC_MCWD, U+0F82, and U+0F39 */
    static final int CC_CM = 2;

-    /** for combining consonants, i.e. U+0F90-U+0FBC minus U+0F98
-     *  minus the decomposable entries like U+0F93, U+0F9D, U+0FA2,
-     *  etc. */
+    /** for combining consonants, that is to say U+0F90-U+0FBC minus
+     *  U+0F98 minus the decomposable entries like U+0F93, U+0F9D,
+     *  U+0FA2, etc. */
    static final int CC_SJC = 3;

-    /** for noncombining consonants, i.e. U+0F40-U+0F6A minus U+0F48
-     *  minus the decomposable entries like U+0F43, U+0F4D, U+0F52,
-     *  etc. */
+    /** for noncombining consonants, that is to say U+0F40-U+0F6A
+     *  minus U+0F48 minus the decomposable entries like U+0F43,
+     *  U+0F4D, U+0F52, etc. */
    static final int CC_CON = 4;

-    /** for simple, nondecomposable vowels, i.e. U+0F72, U+0F74,
-     *  U+0F7A, U+0F7B, U+0F7C, U+0F7D, U+0F80 */
+    /** for simple, nondecomposable vowels, that is to say U+0F72,
+     *  U+0F74, U+0F7A, U+0F7B, U+0F7C, U+0F7D, U+0F80 */
    static final int CC_V = 5;

    /** for U+0F8A */
@ -121,9 +70,9 @@ interface UnicodeReadingStateMachineConstants {
    static final int CC_0F39 = 8;

    /** for U+0F71 */
-    static final int CC_ACHUNG = 9;
+    static final int CC_SUBSCRIBED_ACHUNG = 9;

-    /** for digits, i.e. U+0F20-U+0F33 */
+    /** for digits, that is to say U+0F20-U+0F33 */
    static final int CC_DIGIT = 10;


@ -133,14 +82,14 @@ interface UnicodeReadingStateMachineConstants {
    /** initial state */
    static final int STATE_START = 0;

-    /** ready state, i.e. the state in which some non-empty Unicode
-     *  String is in the holding area, <i>ready</i> to receive
+    /** ready state, that is to say the state in which some non-empty
+     *  Unicode String is in the holding area, <i>ready</i> to receive
     *  combining marks like U+0F35 */
    static final int STATE_READY = 1;

-    /** digit state, i.e. the state in which some non-empty Unicode
-     *  String consisting entirely of digits is in the holding area,
-     *  ready to receive marks that combine only with digits */
+    /** digit state, that is to say the state in which some non-empty
+     *  Unicode String consisting entirely of digits is in the holding
+     *  area, ready to receive marks that combine only with digits */
    static final int STATE_DIGIT = 2;

    /** state in which CC_SJC are welcomed and treated as consonants