Two things:

One, TMW->EWTS gives dbas and dngas instead of dabs and dangs
because Chris Fynn's e-mail from today has dbas and dngas.

Second, Down with ACIPRules.  Long live ACIPTraits.  EWTS->Tibetan
conversion is closer still.
This commit is contained in:
dchandler 2005-02-22 04:36:54 +00:00
parent 82c6047cc2
commit c16f633ecf
18 changed files with 950 additions and 818 deletions

View file

@ -18,12 +18,18 @@ Contributor(s): ______________________________________.
package org.thdl.tib.text.ttt;
import java.util.ArrayList;
import org.thdl.tib.text.DuffCode;
/** A TTraits object encapsulates all the things that make a
* particular Roman transliteration scheme unique. If both EWTS and
* ACIP transliterations have a property in common, then it's likely
* encoded in a manner that's hard to modify. But if they differ in
* some respect, then that difference should be encoded in a TTraits
* object.
* particular Roman transliteration scheme unique. For the most
* part, this difference is expressed at the finest granularity
* possible -- often single characters of Roman transliteration.
*
* <p>If both EWTS and ACIP transliterations have a property in
* common, then it's likely encoded in a manner that's hard to
* modify. But if they differ in some respect, then that difference
* should be encoded in a TTraits object.
*
* <p>It is very likely that classes that implement this interface
* will choose to use the design pattern 'singleton'. */
@ -62,9 +68,63 @@ interface TTraits {
/** Returns true if and only if <em>s</em> is a stretch of
* transliteration corresponding to a Tibetan wowel (without any
* [achen or other] consonant) */
boolean isWowel(String s);
boolean isWowel(String s); // TODO(DLC)[EWTS->Tibetan]: what about "m:" as opposed to "m" or ":"
/** Returns true if and only if the pair given has a simple error
* other than being a mere disambiguator. */
boolean hasSimpleError(TPair p);
/** The implicit 'ahhh' vowel, the one you see when you write the
human-friendly transliteration for "\u0f40\u0f0b". */
String aVowel();
/** Returns true if s is a valid postsuffix. s must not have a
wowel on it. */
boolean isPostsuffix(String s);
/** Returns true if and only if l is the representation of a
letter that can be a suffix. Note that all postsuffixes are
also suffixes. l should not have a wowel. */
boolean isSuffix(String l);
/** Returns true if and only if l is the representation of a
letter that can be a prefix. l should not have a wowel. */
boolean isPrefix(String l);
/** Returns the EWTS transliteration corresponding to the
* consonant l, which should not have a vowel. Returns null if
* there is no such EWTS.
*
* <p>May return "W" instead of "w", "r" instead of "R", and "y"
* instead of "Y" because we sometimes don't have enough context
* to decide.
*
* <p>The reasoning for "W" instead of "w" is that r-w and r+w
* are both known hash keys (as {@link
* org.thdl.tib.text#TibetanMachineWeb} would call them). We
* sort 'em out this way. (They are the only things like this
* according to bug report #800166.) */
String getEwtsForConsonant(String l);
/** Returns the EWTS corresponding to the given punctuation or
* mark. Returns null if there is no such EWTS. */
String getEwtsForOther(String l);
/** Returns the EWTS corresponding to the given "wowel". Returns
* null if there is no such EWTS. */
String getEwtsForWowel(String l);
/** If l is a consonant or vowel or punctuation mark, then this
* returns the Unicode for it. The Unicode for the subscribed
* form of the glyph is returned if subscribed is true. Returns
* null if l is unknown. */
String getUnicodeFor(String l, boolean subscribed);
/** Returns a scanner that can break up a string of
transliteration. */
TTshegBarScanner scanner();
/** Gets the duffcodes for wowel, such that they look good with
* the preceding glyph, and appends them to duff. */
void getDuffForWowel(ArrayList duff, DuffCode preceding, String wowel);
}