Two things:

One, TMW->EWTS gives dbas and dngas instead of dabs and dangs because Chris Fynn's e-mail from today has dbas and dngas. Second, Down with ACIPRules. Long live ACIPTraits. EWTS->Tibetan conversion is closer still.
2005-02-22 04:36:54 +00:00 · 2005-02-22 04:36:54 +00:00 · c16f633ecf
commit c16f633ecf
parent 82c6047cc2
18 changed files with 950 additions and 818 deletions
--- a/source/org/thdl/tib/text/ttt/TTraits.java
+++ b/source/org/thdl/tib/text/ttt/TTraits.java
@ -18,12 +18,18 @@ Contributor(s): ______________________________________.

 package org.thdl.tib.text.ttt;

+import java.util.ArrayList;
+import org.thdl.tib.text.DuffCode;
+
 /** A TTraits object encapsulates all the things that make a
- *  particular Roman transliteration scheme unique.  If both EWTS and
- *  ACIP transliterations have a property in common, then it's likely
- *  encoded in a manner that's hard to modify.  But if they differ in
- *  some respect, then that difference should be encoded in a TTraits
- *  object.
+ *  particular Roman transliteration scheme unique.  For the most
+ *  part, this difference is expressed at the finest granularity
+ *  possible -- often single characters of Roman transliteration.
+ *
+ *  <p>If both EWTS and ACIP transliterations have a property in
+ *  common, then it's likely encoded in a manner that's hard to
+ *  modify.  But if they differ in some respect, then that difference
+ *  should be encoded in a TTraits object.
 *
 *  <p>It is very likely that classes that implement this interface
 *  will choose to use the design pattern 'singleton'. */
@ -62,9 +68,63 @@ interface TTraits {
    /** Returns true if and only if <em>s</em> is a stretch of
     *  transliteration corresponding to a Tibetan wowel (without any
     *  [achen or other] consonant) */
-    boolean isWowel(String s);
+    boolean isWowel(String s); // TODO(DLC)[EWTS->Tibetan]: what about "m:" as opposed to "m" or ":"

    /** Returns true if and only if the pair given has a simple error
     *  other than being a mere disambiguator. */
    boolean hasSimpleError(TPair p);
+
+    /** The implicit 'ahhh' vowel, the one you see when you write the
+        human-friendly transliteration for "\u0f40\u0f0b". */
+    String aVowel();
+
+    /** Returns true if s is a valid postsuffix.  s must not have a
+        wowel on it. */
+    boolean isPostsuffix(String s);
+
+    /** Returns true if and only if l is the representation of a
+        letter that can be a suffix.  Note that all postsuffixes are
+        also suffixes.  l should not have a wowel. */
+    boolean isSuffix(String l);
+
+    /** Returns true if and only if l is the representation of a
+        letter that can be a prefix.  l should not have a wowel. */
+    boolean isPrefix(String l);
+
+    /** Returns the EWTS transliteration corresponding to the
+     *  consonant l, which should not have a vowel.  Returns null if
+     *  there is no such EWTS.
+     *
+     *  <p>May return "W" instead of "w", "r" instead of "R", and "y"
+     *  instead of "Y" because we sometimes don't have enough context
+     *  to decide.
+     *
+     *  <p>The reasoning for "W" instead of "w" is that r-w and r+w
+     *  are both known hash keys (as {@link
+     *  org.thdl.tib.text#TibetanMachineWeb} would call them).  We
+     *  sort 'em out this way.  (They are the only things like this
+     *  according to bug report #800166.) */
+    String getEwtsForConsonant(String l);
+
+    /** Returns the EWTS corresponding to the given punctuation or
+     *  mark.  Returns null if there is no such EWTS. */
+    String getEwtsForOther(String l);
+
+    /** Returns the EWTS corresponding to the given "wowel".  Returns
+     *  null if there is no such EWTS. */
+    String getEwtsForWowel(String l);
+
+    /** If l is a consonant or vowel or punctuation mark, then this
+     *  returns the Unicode for it.  The Unicode for the subscribed
+     *  form of the glyph is returned if subscribed is true.  Returns
+     *  null if l is unknown. */
+    String getUnicodeFor(String l, boolean subscribed);
+
+    /** Returns a scanner that can break up a string of
+        transliteration. */
+    TTshegBarScanner scanner();
+
+    /** Gets the duffcodes for wowel, such that they look good with
+     *  the preceding glyph, and appends them to duff. */
+    void getDuffForWowel(ArrayList duff, DuffCode preceding, String wowel);
 }