Fixed ACIP->TMW vowels like 'I etc.
Fixed ACIP->Unicode/TMW for BDE, which should be B-DE, not B+DE, because the former is legal Tibetan. The ACIP->EWTS subroutine has improved. TMW->Wylie and TMW->ACIP are improved in error cases. TMW->ACIP has friendly embedded error messages now.
This commit is contained in:
parent
16817d0b8e
commit
115d0e0e6c
14 changed files with 689 additions and 472 deletions
|
@ -168,7 +168,7 @@ public final class DuffCode {
|
|||
* recursion (manifesting as a StackOverflowError)) */
|
||||
public String toString(boolean TMW) {
|
||||
boolean[] err = new boolean[] { false };
|
||||
return "<duffcode font="
|
||||
return "<glyph font="
|
||||
+ (TMW
|
||||
? TibetanMachineWeb.tmwFontNames
|
||||
: TibetanMachineWeb.tmFontNames)[fontNum]
|
||||
|
|
|
@ -73,8 +73,14 @@ public class TGCPair {
|
|||
vowelWylie = null;
|
||||
}
|
||||
public String getWylie() {
|
||||
return getWylie(false);
|
||||
}
|
||||
public String getWylie(boolean appendaged) {
|
||||
StringBuffer b = new StringBuffer();
|
||||
if (consonantWylie != null) {
|
||||
if (appendaged && !"'".equals(consonantWylie))
|
||||
b.append("a"); // pa'am... we want 'am, not 'm; 'ang, not 'ng.
|
||||
|
||||
// we may have {p-y}, but the user wants to see {py}.
|
||||
for (int i = 0; i < consonantWylie.length(); i++) {
|
||||
char ch = consonantWylie.charAt(i);
|
||||
|
@ -87,26 +93,35 @@ public class TGCPair {
|
|||
return b.toString();
|
||||
}
|
||||
public String getACIP() {
|
||||
return getACIP(false);
|
||||
}
|
||||
public String getACIP(boolean appendaged) {
|
||||
// DLC FIXME: has the EWTS change affected Manipulate.acipToWylie?
|
||||
StringBuffer b = new StringBuffer();
|
||||
if (consonantWylie != null) {
|
||||
String consonantACIP // DLC FIXME can KAsh occur?
|
||||
= org.thdl.tib.scanner.Manipulate.wylieToAcip(consonantWylie);
|
||||
if (null == consonantACIP) throw new Error("how?");
|
||||
// System.out.println("DLC: Wylie=" + consonantWylie + ", ACIP=" + consonantACIP);
|
||||
// we may have {P-Y}, but the user wants to see {PY}.
|
||||
for (int i = 0; i < consonantACIP.length(); i++) {
|
||||
char ch = consonantACIP.charAt(i);
|
||||
if ('-' != ch)
|
||||
b.append(ch);
|
||||
String consonantACIP
|
||||
= org.thdl.tib.text.ttt.ACIPRules.getACIPForEWTS(consonantWylie);
|
||||
if (null == consonantACIP) {
|
||||
return TibetanMachineWeb.getTMWToACIPErrorString("glyph with THDL Extended Wylie " + consonantWylie);
|
||||
} else {
|
||||
if (appendaged && !"'".equals(consonantWylie))
|
||||
b.append("A"); // PA'AM
|
||||
// we may have {P-Y}, but the user wants to see {PY}.
|
||||
for (int i = 0; i < consonantACIP.length(); i++) {
|
||||
char ch = consonantACIP.charAt(i);
|
||||
if ('-' != ch)
|
||||
b.append(ch);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (vowelWylie != null) {
|
||||
String vowelACIP // DLC FIXME look for exceptions
|
||||
= org.thdl.tib.scanner.Manipulate.wylieToAcip(vowelWylie);
|
||||
// System.out.println("DLC: Wylie=" + vowelWylie + ", ACIP=" + vowelACIP);
|
||||
if (null == vowelACIP) throw new Error("how?");
|
||||
b.append(vowelACIP);
|
||||
String vowelACIP
|
||||
= org.thdl.tib.text.ttt.ACIPRules.getACIPForEWTS(vowelWylie);
|
||||
if (null == vowelACIP) {
|
||||
return TibetanMachineWeb.getTMWToACIPErrorString("glyph with THDL Extended Wylie " + vowelWylie);
|
||||
} else {
|
||||
b.append(vowelACIP);
|
||||
}
|
||||
}
|
||||
return b.toString();
|
||||
}
|
||||
|
@ -150,6 +165,12 @@ public class TGCPair {
|
|||
}
|
||||
|
||||
this.consonantWylie = consonantWylie;
|
||||
if (null != vowelWylie) {
|
||||
if (vowelWylie.equals("iA") || vowelWylie.equals("Ai"))
|
||||
vowelWylie = "I";
|
||||
if (vowelWylie.equals("uA") || vowelWylie.equals("Au"))
|
||||
vowelWylie = "U";
|
||||
}
|
||||
this.vowelWylie = vowelWylie;
|
||||
this.classification = realClassification;
|
||||
}
|
||||
|
|
|
@ -360,7 +360,7 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
else
|
||||
dc = (DuffCode)glyphs.removeLast(); //LinkedList implementation
|
||||
|
||||
glyphs.addAll(getBindu(dc));
|
||||
getBindu(glyphs, dc);
|
||||
}
|
||||
|
||||
else {
|
||||
|
@ -477,303 +477,366 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
}
|
||||
|
||||
/**
|
||||
* Gets the bindu sequence for a given context.
|
||||
* In the TibetanMachineWeb fonts, bindu (anusvara) is realized
|
||||
* differently depending on which vowel it attaches to. Although
|
||||
* the default bindu glyph is affixed to consonants and subscript vowels,
|
||||
* for superscript vowels (i, e, o, etc), there is a single glyph
|
||||
* which merges the bindu and that vowel together. When you pass this
|
||||
* method a glyph context, it will return a List of glyphs which
|
||||
* will either consist of the original glyph followed by the default
|
||||
* bindu glyph, or a composite vowel+bindu glyph.
|
||||
* Note that there is only one glyph in the context. This means that
|
||||
* bindus will not affix properly if superscript vowels are allowed to directly
|
||||
* precede subscript vowels (e.g. pou).
|
||||
* @param dc the DuffCode of the glyph you
|
||||
* want to attach a bindu to
|
||||
* @return a List of DuffCode glyphs that include the
|
||||
* original dc, as well as a bindu
|
||||
*/
|
||||
public static List getBindu(DuffCode dc) {
|
||||
List bindus = new ArrayList();
|
||||
|
||||
if (null == dc) {
|
||||
bindus.add(TibetanMachineWeb.getGlyph(String.valueOf(BINDU)));
|
||||
return bindus;
|
||||
}
|
||||
|
||||
if (!TibetanMachineWeb.getBinduMap().containsKey(dc)) {
|
||||
bindus.add(dc);
|
||||
bindus.add(TibetanMachineWeb.getGlyph(String.valueOf(BINDU)));
|
||||
return bindus;
|
||||
}
|
||||
|
||||
bindus.add((DuffCode)TibetanMachineWeb.getBinduMap().get(dc));
|
||||
return bindus;
|
||||
}
|
||||
* Gets the bindu sequence for a given context. In the
|
||||
* TibetanMachineWeb fonts, bindu (anusvara) is realized differently
|
||||
* depending on which vowel it attaches to. Although the default bindu
|
||||
* glyph is affixed to consonants and subscript vowels, for superscript
|
||||
* vowels (i, e, o, etc), there is a single glyph which merges the
|
||||
* bindu and that vowel together. When you pass this method a glyph
|
||||
* context and a list, it will append to that list glyphs which will either consist
|
||||
* of the original glyph followed by the default bindu glyph, or a
|
||||
* composite vowel+bindu glyph. Note that there is only one glyph in
|
||||
* the context. This means that bindus will not affix properly if
|
||||
* superscript vowels are allowed to directly precede subscript vowels
|
||||
* (e.g. pou).
|
||||
* @param list a List of DuffCode glyphs to which will be appended the
|
||||
* original dc (if non-null) as well as a bindu, or the one glyph that
|
||||
* represents both
|
||||
* @param dc the DuffCode of the glyph you want to attach a bindu to,
|
||||
* or null */
|
||||
public static void getBindu(List list, DuffCode dc) {
|
||||
if (null == dc) {
|
||||
list.add(TibetanMachineWeb.getGlyph(String.valueOf(BINDU)));
|
||||
} else {
|
||||
if (!TibetanMachineWeb.getBinduMap().containsKey(dc)) {
|
||||
list.add(dc);
|
||||
list.add(TibetanMachineWeb.getGlyph(String.valueOf(BINDU)));
|
||||
} else {
|
||||
list.add((DuffCode)TibetanMachineWeb.getBinduMap().get(dc));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the vowel sequence for a given vowel in a given context.
|
||||
* Given a context, this method affixes a vowel and returns the
|
||||
* context plus the vowel. Generally, it is enough to provide just
|
||||
* one glyph for context.
|
||||
* Gets the vowel sequence for a given vowel in a given context. Given
|
||||
* a context, this method affixes a vowel and returns the context (iff
|
||||
* context_added[0] is false) plus the vowel. Generally, it is enough
|
||||
* to provide just one glyph for context.
|
||||
* @param context the glyph preceding the vowel you want to affix
|
||||
* @param vowel the vowel you want to affix, in Wylie
|
||||
* @param context_added an array of one boolean, an input/output
|
||||
* parameter that, if true, means that only the vowel will be added to
|
||||
* l, not the context, and if false, means that the context and the
|
||||
* vowel will be added and that context_added[0] will be updated to be
|
||||
* true
|
||||
* @return a List of glyphs equal to the vowel in context
|
||||
*/
|
||||
* @throws IllegalArgumentException if the given combination is not
|
||||
* supported */
|
||||
public static void getVowel(List l, DuffCode context, String vowel, boolean context_added[]) {
|
||||
getVowel(l, null, context, vowel, context_added);
|
||||
}
|
||||
/** Wrapper that calls for adding context to l. */
|
||||
public static void getVowel(List l, DuffCode context, String vowel) {
|
||||
getVowel(l, null, context, vowel);
|
||||
getVowel(l, null, context, vowel, new boolean[] { false });
|
||||
}
|
||||
/** Wrapper that calls for adding context to l. */
|
||||
public static void getVowel(List l, DuffCode context_1, DuffCode context_2, String vowel) {
|
||||
getVowel(l, context_1, context_2, vowel, new boolean[] { false });
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the vowel sequence for a given vowel in a given context and
|
||||
* appends it to l. Given a context, this method affixes a vowel and
|
||||
* appends the context plus the vowel to l. Since the choice of vowel
|
||||
* glyph depends on the consonant to which it is attached, generally it
|
||||
* is enough to provide just the immediately preceding
|
||||
* context. However, in some cases, double vowels are allowed - for
|
||||
* example 'buo'. To find the correct glyph for 'o', we need 'b' in
|
||||
* this case, not 'u'. Note also that some Extended Wylie vowels
|
||||
* correspond to multiple glyphs in TibetanMachineWeb. For example, the
|
||||
* vowel I consists of both an achung and a reverse gigu. All required
|
||||
* glyphs are appended to l.
|
||||
* appends the context (iff context_added[0] is false) plus the vowel
|
||||
* to l. Since the choice of vowel glyph depends on the consonant to
|
||||
* which it is attached, generally it is enough to provide just the
|
||||
* immediately preceding context. However, in some cases, double vowels
|
||||
* are allowed - for example 'buo'. To find the correct glyph for 'o',
|
||||
* we need 'b' in this case, not 'u'. Note also that some Extended
|
||||
* Wylie vowels correspond to multiple glyphs in TibetanMachineWeb. For
|
||||
* example, the vowel I consists of both an achung and a reverse
|
||||
* gigu. All required glyphs are appended to l.
|
||||
* @param context_1 the glyph occurring two glyphs before the vowel you
|
||||
* want to affix
|
||||
* @param context_2 the glyph immediately before the vowel you want to
|
||||
* affix
|
||||
* @param vowel the vowel you want to affix, in Wylie */
|
||||
* @param vowel the vowel you want to affix, in Wylie
|
||||
* @param context_added an array of one boolean, an input/output
|
||||
* parameter that, if true, means that only the vowel will be added to
|
||||
* l, not the context, and if false, means that the context and the
|
||||
* vowel will be added and that context_added[0] will be updated to be
|
||||
* true
|
||||
* @throws IllegalArgumentException if the given combination is not
|
||||
* supported */
|
||||
|
||||
public static void getVowel(List l, DuffCode context_1, DuffCode context_2, String vowel) {
|
||||
//this vowel doesn't correspond to a glyph -
|
||||
//so you just return the original context
|
||||
public static void getVowel(List l, DuffCode context_1, DuffCode context_2,
|
||||
String vowel, boolean context_added[])
|
||||
throws IllegalArgumentException
|
||||
{
|
||||
//this vowel doesn't correspond to a glyph -
|
||||
//so you just return the original context
|
||||
|
||||
if ( vowel.equals(WYLIE_aVOWEL) ||
|
||||
TibetanMachineWeb.isTopVowel(context_2)) {
|
||||
if (context_1 != null)
|
||||
l.add(context_1);
|
||||
if (vowel.equals(WYLIE_aVOWEL)
|
||||
|| TibetanMachineWeb.isTopVowel(context_2)) {
|
||||
if (TibetanMachineWeb.isTopVowel(context_2))
|
||||
throw new IllegalArgumentException("dropping vowels is bad");
|
||||
if (!context_added[0]) {
|
||||
context_added[0] = true;
|
||||
if (context_1 != null)
|
||||
l.add(context_1);
|
||||
|
||||
l.add(context_2);
|
||||
return;
|
||||
}
|
||||
l.add(context_2);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
//first, the three easiest cases: ai, au, and <i
|
||||
//these vowels have one invariant form - therefore,
|
||||
//dc_context is just returned along with that form
|
||||
//first, the three easiest cases: ai, au, and <i
|
||||
//these vowels have one invariant form - therefore,
|
||||
//dc_context is just returned along with that form
|
||||
|
||||
if (vowel.equals(ai_VOWEL)) {
|
||||
if (context_1 != null)
|
||||
l.add(context_1);
|
||||
if (vowel.equals(ai_VOWEL)) {
|
||||
if (!context_added[0]) {
|
||||
context_added[0] = true;
|
||||
if (context_1 != null)
|
||||
l.add(context_1);
|
||||
|
||||
l.add(context_2);
|
||||
DuffCode[] dc_v = (DuffCode[])TibetanMachineWeb.getTibHash().get(ai_VOWEL);
|
||||
l.add(dc_v[TibetanMachineWeb.TMW]);
|
||||
return;
|
||||
}
|
||||
l.add(context_2);
|
||||
}
|
||||
DuffCode[] dc_v = (DuffCode[])TibetanMachineWeb.getTibHash().get(ai_VOWEL);
|
||||
l.add(dc_v[TibetanMachineWeb.TMW]);
|
||||
return;
|
||||
}
|
||||
|
||||
if (vowel.equals(au_VOWEL)) {
|
||||
if (context_1 != null)
|
||||
l.add(context_1);
|
||||
if (vowel.equals(au_VOWEL)) {
|
||||
if (!context_added[0]) {
|
||||
context_added[0] = true;
|
||||
if (context_1 != null)
|
||||
l.add(context_1);
|
||||
|
||||
l.add(context_2);
|
||||
DuffCode[] dc_v = (DuffCode[])TibetanMachineWeb.getTibHash().get(au_VOWEL);
|
||||
l.add(dc_v[TibetanMachineWeb.TMW]);
|
||||
return;
|
||||
}
|
||||
l.add(context_2);
|
||||
}
|
||||
DuffCode[] dc_v = (DuffCode[])TibetanMachineWeb.getTibHash().get(au_VOWEL);
|
||||
l.add(dc_v[TibetanMachineWeb.TMW]);
|
||||
return;
|
||||
}
|
||||
|
||||
if (vowel.equals(reverse_i_VOWEL)) {
|
||||
if (context_1 != null)
|
||||
l.add(context_1);
|
||||
if (vowel.equals(reverse_i_VOWEL)) {
|
||||
if (!context_added[0]) {
|
||||
context_added[0] = true;
|
||||
if (context_1 != null)
|
||||
l.add(context_1);
|
||||
|
||||
l.add(context_2);
|
||||
l.add(context_2);
|
||||
}
|
||||
|
||||
if (!TibetanMachineWeb.isTopVowel(context_2)) {
|
||||
DuffCode[] dc_v = (DuffCode[])TibetanMachineWeb.getTibHash().get(reverse_i_VOWEL);
|
||||
l.add(dc_v[TibetanMachineWeb.TMW]);
|
||||
} else throw new IllegalArgumentException("dropping vowels is bad");
|
||||
|
||||
if (!TibetanMachineWeb.isTopVowel(context_2)) {
|
||||
DuffCode[] dc_v = (DuffCode[])TibetanMachineWeb.getTibHash().get(reverse_i_VOWEL);
|
||||
l.add(dc_v[TibetanMachineWeb.TMW]);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
//second, the vowels i, e, and o
|
||||
//these vowels have many different glyphs each,
|
||||
//whose correct selection depends on the
|
||||
//preceding context. therefore, dc_context is
|
||||
//returned along with the vowel appropriate to
|
||||
//that context
|
||||
|
||||
//second, the vowels i, e, and o
|
||||
//these vowels have many different glyphs each,
|
||||
//whose correct selection depends on the
|
||||
//preceding context. therefore, dc_context is
|
||||
//returned along with the vowel appropriate to
|
||||
//that context
|
||||
if (vowel.equals(i_VOWEL)) {
|
||||
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
|
||||
DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_i);
|
||||
if (null == dc_v && null != context_1) {
|
||||
hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_1);
|
||||
dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_i);
|
||||
}
|
||||
|
||||
if (vowel.equals(i_VOWEL)) {
|
||||
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
|
||||
DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_i);
|
||||
if (null == dc_v && null != context_1) {
|
||||
hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_1);
|
||||
dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_i);
|
||||
}
|
||||
if (!context_added[0]) {
|
||||
context_added[0] = true;
|
||||
if (context_1 != null)
|
||||
l.add(context_1);
|
||||
|
||||
if (context_1 != null)
|
||||
l.add(context_1);
|
||||
l.add(context_2);
|
||||
}
|
||||
|
||||
if (null != dc_v)
|
||||
l.add(dc_v);
|
||||
else throw new IllegalArgumentException("dropping vowels is bad");
|
||||
|
||||
l.add(context_2);
|
||||
return;
|
||||
}
|
||||
// DLC perfect TMW->Wylie wouldn't produce o'i for an input file containing merely TMW9.61 -- it would produce \u0f7c,\u0f60,\u0f72 -- round-trip shows why.
|
||||
|
||||
if (null != dc_v)
|
||||
l.add(dc_v);
|
||||
if (vowel.equals(e_VOWEL)) {
|
||||
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
|
||||
DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_e);
|
||||
if (null == dc_v && null != context_1) {
|
||||
hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_1);
|
||||
dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_e);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
if (!context_added[0]) {
|
||||
context_added[0] = true;
|
||||
if (context_1 != null)
|
||||
l.add(context_1);
|
||||
|
||||
if (vowel.equals(e_VOWEL)) {
|
||||
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
|
||||
DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_e);
|
||||
if (null == dc_v && null != context_1) {
|
||||
hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_1);
|
||||
dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_e);
|
||||
}
|
||||
l.add(context_2);
|
||||
}
|
||||
|
||||
if (null != dc_v)
|
||||
l.add(dc_v);
|
||||
else throw new IllegalArgumentException("dropping vowels is bad");
|
||||
|
||||
if (context_1 != null)
|
||||
l.add(context_1);
|
||||
return;
|
||||
}
|
||||
|
||||
l.add(context_2);
|
||||
if (vowel.equals(o_VOWEL)) {
|
||||
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
|
||||
DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_o);
|
||||
if (null == dc_v && null != context_1) {
|
||||
hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_1);
|
||||
dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_o);
|
||||
}
|
||||
|
||||
if (null != dc_v)
|
||||
l.add(dc_v);
|
||||
if (!context_added[0]) {
|
||||
context_added[0] = true;
|
||||
if (context_1 != null)
|
||||
l.add(context_1);
|
||||
|
||||
return;
|
||||
}
|
||||
l.add(context_2);
|
||||
}
|
||||
|
||||
if (vowel.equals(o_VOWEL)) {
|
||||
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
|
||||
DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_o);
|
||||
if (null == dc_v && null != context_1) {
|
||||
hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_1);
|
||||
dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_o);
|
||||
}
|
||||
if (null != dc_v)
|
||||
l.add(dc_v);
|
||||
else throw new IllegalArgumentException("dropping vowels is bad");
|
||||
|
||||
if (context_1 != null)
|
||||
l.add(context_1);
|
||||
return;
|
||||
}
|
||||
|
||||
l.add(context_2);
|
||||
//next come the vowels u, A, and U
|
||||
//these three vowels are grouped together because they all
|
||||
//can cause the preceding context to change. in particular,
|
||||
//both u and A cannot be affixed to ordinary k or g, but
|
||||
//rather the shortened versions of k and g - therefore,
|
||||
|
||||
if (null != dc_v)
|
||||
l.add(dc_v);
|
||||
if (vowel.equals(u_VOWEL)) {
|
||||
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
|
||||
DuffCode halfHeight = TibetanMachineWeb.getHalfHeightGlyph(hashKey_context);
|
||||
DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_u);
|
||||
|
||||
return;
|
||||
}
|
||||
if (!context_added[0]) {
|
||||
context_added[0] = true;
|
||||
if (null != context_1)
|
||||
l.add(context_1);
|
||||
|
||||
//next come the vowels u, A, and U
|
||||
//these three vowels are grouped together because they all
|
||||
//can cause the preceding context to change. in particular,
|
||||
//both u and A cannot be affixed to ordinary k or g, but
|
||||
//rather the shortened versions of k and g - therefore,
|
||||
if (null == halfHeight)
|
||||
l.add(context_2);
|
||||
else
|
||||
l.add(halfHeight);
|
||||
}
|
||||
|
||||
if (null != dc_v)
|
||||
l.add(dc_v);
|
||||
else throw new IllegalArgumentException("dropping vowels is bad");
|
||||
|
||||
if (vowel.equals(u_VOWEL)) {
|
||||
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
|
||||
DuffCode halfHeight = TibetanMachineWeb.getHalfHeightGlyph(hashKey_context);
|
||||
DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_u);
|
||||
return;
|
||||
}
|
||||
|
||||
if (null != context_1)
|
||||
l.add(context_1);
|
||||
if (vowel.equals(A_VOWEL)) {
|
||||
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
|
||||
DuffCode halfHeight = TibetanMachineWeb.getHalfHeightGlyph(hashKey_context);
|
||||
DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_A);
|
||||
|
||||
if (null == halfHeight)
|
||||
l.add(context_2);
|
||||
else
|
||||
l.add(halfHeight);
|
||||
if (!context_added[0]) {
|
||||
context_added[0] = true;
|
||||
if (null != context_1)
|
||||
l.add(context_1);
|
||||
|
||||
if (null != dc_v)
|
||||
l.add(dc_v);
|
||||
if (null == halfHeight)
|
||||
l.add(context_2);
|
||||
else
|
||||
l.add(halfHeight);
|
||||
}
|
||||
|
||||
if (null != dc_v)
|
||||
l.add(dc_v);
|
||||
else throw new IllegalArgumentException("dropping vowels is bad");
|
||||
|
||||
return;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (vowel.equals(A_VOWEL)) {
|
||||
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
|
||||
DuffCode halfHeight = TibetanMachineWeb.getHalfHeightGlyph(hashKey_context);
|
||||
DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_A);
|
||||
if (vowel.equals(U_VOWEL)) {
|
||||
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
|
||||
DuffCode halfHeight = TibetanMachineWeb.getHalfHeightGlyph(hashKey_context);
|
||||
DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_U);
|
||||
|
||||
if (null != context_1)
|
||||
l.add(context_1);
|
||||
if (!context_added[0]) {
|
||||
context_added[0] = true;
|
||||
if (null != context_1)
|
||||
l.add(context_1);
|
||||
|
||||
if (null == halfHeight)
|
||||
l.add(context_2);
|
||||
else
|
||||
l.add(halfHeight);
|
||||
if (null == halfHeight)
|
||||
l.add(context_2);
|
||||
else
|
||||
l.add(halfHeight);
|
||||
}
|
||||
|
||||
if (null != dc_v && !TibetanMachineWeb.isTopVowel(context_2))
|
||||
l.add(dc_v);
|
||||
else throw new IllegalArgumentException("dropping vowels is bad");
|
||||
|
||||
if (null != dc_v)
|
||||
return;
|
||||
}
|
||||
|
||||
l.add(dc_v);
|
||||
//finally, the vowels I and <I
|
||||
//these vowels are unique in that they both
|
||||
//require a change from the previous character,
|
||||
//and consist of two glyphs themselves
|
||||
|
||||
return;
|
||||
}
|
||||
if (vowel.equals(I_VOWEL)) {
|
||||
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
|
||||
DuffCode halfHeight = TibetanMachineWeb.getHalfHeightGlyph(hashKey_context);
|
||||
DuffCode dc_v_sub = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_A);
|
||||
DuffCode dc_v_sup = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_i);
|
||||
|
||||
if (vowel.equals(U_VOWEL)) {
|
||||
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
|
||||
DuffCode halfHeight = TibetanMachineWeb.getHalfHeightGlyph(hashKey_context);
|
||||
DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_U);
|
||||
if (!context_added[0]) {
|
||||
context_added[0] = true;
|
||||
if (null != context_1)
|
||||
l.add(context_1);
|
||||
|
||||
if (null != context_1)
|
||||
l.add(context_1);
|
||||
if (null == halfHeight)
|
||||
l.add(context_2);
|
||||
else
|
||||
l.add(halfHeight);
|
||||
}
|
||||
|
||||
if (null != dc_v_sub && null != dc_v_sup) {
|
||||
l.add(dc_v_sub);
|
||||
l.add(dc_v_sup);
|
||||
} else throw new IllegalArgumentException("dropping vowels is bad");
|
||||
|
||||
if (null == halfHeight)
|
||||
l.add(context_2);
|
||||
else
|
||||
l.add(halfHeight);
|
||||
return;
|
||||
}
|
||||
|
||||
if (null != dc_v && !TibetanMachineWeb.isTopVowel(context_2))
|
||||
l.add(dc_v);
|
||||
if (vowel.equals(reverse_I_VOWEL)) {
|
||||
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
|
||||
DuffCode halfHeight = TibetanMachineWeb.getHalfHeightGlyph(hashKey_context);
|
||||
DuffCode dc_v_sub = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_A);
|
||||
DuffCode[] tv_array = (DuffCode[])TibetanMachineWeb.getTibHash().get(reverse_i_VOWEL);
|
||||
DuffCode dc_v_sup = tv_array[TibetanMachineWeb.TMW];
|
||||
|
||||
return;
|
||||
}
|
||||
if (!context_added[0]) {
|
||||
context_added[0] = true;
|
||||
if (null != context_1)
|
||||
l.add(context_1);
|
||||
|
||||
//finally, the vowels I and <I
|
||||
//these vowels are unique in that they both
|
||||
//require a change from the previous character,
|
||||
//and consist of two glyphs themselves
|
||||
if (null == halfHeight)
|
||||
l.add(context_2);
|
||||
else
|
||||
l.add(halfHeight);
|
||||
}
|
||||
|
||||
if (null != dc_v_sub && null != dc_v_sup) {
|
||||
l.add(dc_v_sub);
|
||||
l.add(dc_v_sup);
|
||||
} else throw new IllegalArgumentException("dropping vowels is bad");
|
||||
|
||||
if (vowel.equals(I_VOWEL)) {
|
||||
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
|
||||
DuffCode halfHeight = TibetanMachineWeb.getHalfHeightGlyph(hashKey_context);
|
||||
DuffCode dc_v_sub = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_A);
|
||||
DuffCode dc_v_sup = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_i);
|
||||
return;
|
||||
}
|
||||
|
||||
if (null != context_1)
|
||||
l.add(context_1);
|
||||
|
||||
if (null == halfHeight)
|
||||
l.add(context_2);
|
||||
else
|
||||
l.add(halfHeight);
|
||||
|
||||
if (null != dc_v_sub && null != dc_v_sup) {
|
||||
l.add(dc_v_sub);
|
||||
l.add(dc_v_sup);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
if (vowel.equals(reverse_I_VOWEL)) {
|
||||
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
|
||||
DuffCode halfHeight = TibetanMachineWeb.getHalfHeightGlyph(hashKey_context);
|
||||
DuffCode dc_v_sub = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_A);
|
||||
DuffCode[] tv_array = (DuffCode[])TibetanMachineWeb.getTibHash().get(reverse_i_VOWEL);
|
||||
DuffCode dc_v_sup = tv_array[TibetanMachineWeb.TMW];
|
||||
|
||||
if (null != context_1)
|
||||
l.add(context_1);
|
||||
|
||||
if (null == halfHeight)
|
||||
l.add(context_2);
|
||||
else
|
||||
l.add(halfHeight);
|
||||
|
||||
if (null != dc_v_sub && null != dc_v_sup) {
|
||||
l.add(dc_v_sub);
|
||||
l.add(dc_v_sup);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
throw new Error("DLC can this happen? " + vowel);
|
||||
}
|
||||
throw new IllegalArgumentException("bad vowel " + vowel);
|
||||
}
|
||||
|
||||
/**
|
||||
* True if you want TibetanMachineWeb-to-Extended-Wylie conversion
|
||||
|
@ -844,7 +907,7 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
|
||||
// DLC FIXME: {H}, U+0F7F, is part of a grapheme cluster!
|
||||
// David Chapman and I both need a comprehensive list of these
|
||||
// guys.
|
||||
// guys. Get it from Unicode 4.0 spec?
|
||||
/** Scans the glyphs in glyphList and creates the returned list of
|
||||
grapheme clusters based on them. A grapheme cluster is a
|
||||
consonant or consonant stack with optional adornment or a
|
||||
|
@ -889,6 +952,11 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
} else if (TibetanMachineWeb.isWylieAdornmentAndContainsVowel(wylie)
|
||||
|| TibetanMachineWeb.isWylieAdornment(wylie)) {
|
||||
buildingUpVowel.append(wylie);
|
||||
// DLC FIXME: I bet three or four vowels together
|
||||
// breaks TMW->ACIP and TMW->EWTS. Test it. When it
|
||||
// does, revamp TGCPair to have a set of vowels. The
|
||||
// output order should be consistent with the
|
||||
// Unicode-imposed order on vowels.
|
||||
} else {
|
||||
// number or weird thing:
|
||||
|
||||
|
@ -1134,12 +1202,6 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
if (isAppendageNonVowelWylie(wylie)) {
|
||||
candidateType
|
||||
= candidateType.substring("maybe-".length()).intern();
|
||||
// So that we get 'am, not 'm; 'ang, not 'ng:
|
||||
|
||||
// FIXME: cludge: weird place to do this.
|
||||
// pa'am, not pa'm is what we want, sure,
|
||||
// but doing this here is ugly.
|
||||
tp.setWylie(WYLIE_aVOWEL + tp.getWylie());
|
||||
} else {
|
||||
if (null != warnings)
|
||||
warnings.append("Found a tsheg bar that has an achung (" + ACHUNG + ") tacked on, followed by some other thing whose wylie is " + wylie + "\n");
|
||||
|
@ -1264,7 +1326,7 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
if (TibetanMachineWeb.isWylieTibetanConsonantOrConsonantStack(wylie)
|
||||
|| TibetanMachineWeb.isWylieSanskritConsonantStack(wylie)) {
|
||||
translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie));
|
||||
} else {
|
||||
} else if (i + 1 < sz) {
|
||||
if (TGCPair.CONSONANTAL_WITH_VOWEL != cls
|
||||
&& TGCPair.SANSKRIT_WITH_VOWEL != cls)
|
||||
translitBuffer.append(EWTSNotACIP ? WYLIE_DISAMBIGUATING_KEY : '-');
|
||||
|
@ -1277,7 +1339,8 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
int leftover = sz + 1;
|
||||
|
||||
// Appendaged vs. not appendaged? it affects nothing at
|
||||
// this stage.
|
||||
// this stage except for pa'm vs. pa'am.
|
||||
boolean appendaged = (candidateType.startsWith("appendaged-"));
|
||||
candidateType = getCandidateTypeModuloAppendage(candidateType);
|
||||
|
||||
if ("prefix/root-root/suffix-suffix/postsuffix" == candidateType) {
|
||||
|
@ -1433,7 +1496,9 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
// append the wylie/ACIP left over:
|
||||
for (int i = leftover; i < sz; i++) {
|
||||
TGCPair tp = (TGCPair)gcs.get(i);
|
||||
translitBuffer.append(EWTSNotACIP ? tp.getWylie() : tp.getACIP());
|
||||
translitBuffer.append(EWTSNotACIP
|
||||
? tp.getWylie(appendaged)
|
||||
: tp.getACIP(appendaged));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1468,6 +1533,7 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
ArrayList glyphList = new ArrayList();
|
||||
StringBuffer translitBuffer = new StringBuffer();
|
||||
|
||||
// DLC FIXME: " " should become " ", and test with ACIP # and *.
|
||||
for (int i=0; i<dcs.length; i++) {
|
||||
char ch = dcs[i].getCharacter();
|
||||
int k = dcs[i].getCharNum();
|
||||
|
@ -1482,6 +1548,14 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
warnings.append("Some glyphs came right before a newline; they did not have a tsheg or shad come first.");
|
||||
}
|
||||
|
||||
// In ACIP, \n\n (or \r\n\r\n with DOS line feeds)
|
||||
// indicates a real line break.
|
||||
if (!EWTSNotACIP && '\n' == ch) {
|
||||
if (i > 0 && dcs[i - 1].getCharacter() == '\r')
|
||||
translitBuffer.append("\r\n");
|
||||
else
|
||||
translitBuffer.append(ch);
|
||||
}
|
||||
translitBuffer.append(ch);
|
||||
} else {
|
||||
String wylie = TibetanMachineWeb.getWylieForGlyph(dcs[i], noSuch);
|
||||
|
|
|
@ -994,6 +994,8 @@ private static boolean isAmbHelper(String y) {
|
|||
* @return true if x + y is ambiguous in the Extended Wylie
|
||||
* transliteration, false if not */
|
||||
public static boolean isAmbiguousWylie(String x, String y) {
|
||||
// DLC NOW: BDE vs. B+DE -- TMW->ACIP should give B+DE to be very friendly to machines.
|
||||
|
||||
// What about ambiguity between wa-zur and wa? dwa vs. d.wa, e.g.?
|
||||
// Some would say it doesn't matter, because that's illegal. wa
|
||||
// doesn't take any prefixes. But I want even illegal stuff to
|
||||
|
@ -1719,19 +1721,21 @@ private static String acipForGlyph(String hashKey) {
|
|||
* documented in www/htdocs/TMW_RTF_TO_THDL_WYLIE.html, so change
|
||||
* them both when you change this. */
|
||||
private static String getTMWToWylieErrorString(DuffCode dc) {
|
||||
return "<<[[JSKAD_TMW_TO_WYLIE_ERROR_NO_SUCH_WYLIE: Cannot convert DuffCode "
|
||||
return "<<[[JSKAD_TMW_TO_WYLIE_ERROR_NO_SUCH_WYLIE: Cannot convert "
|
||||
+ dc.toString(true)
|
||||
+ " to THDL Extended Wylie. Please see the documentation for the TMW font and transcribe this yourself.]]>>";
|
||||
+ " to THDL Extended Wylie. Please see the documentation for the TM or TMW font and transcribe this yourself.]]>>";
|
||||
}
|
||||
|
||||
/** Error that appears in a document when some TMW cannot be
|
||||
* transcribed in ACIP. This error message is
|
||||
* documented in www/htdocs/TMW_RTF_TO_THDL_WYLIE.html (DLC NOT YET), so change
|
||||
* them both when you change this. */
|
||||
static String getTMWToACIPErrorString(String it) {
|
||||
return "[# JSKAD_TMW_TO_ACIP_ERROR_NO_SUCH_ACIP: Cannot convert " + it + " to ACIP. Please transcribe this yourself.]";
|
||||
}
|
||||
|
||||
private static String getTMWToACIPErrorString(DuffCode dc) {
|
||||
return "<<[[JSKAD_TMW_TO_ACIP_ERROR_NO_SUCH_ACIP: Cannot convert DuffCode "
|
||||
+ dc.toString(true)
|
||||
+ " to ACIP. Please see the documentation for the TMW font and transcribe this yourself.]]>>";
|
||||
return getTMWToACIPErrorString(dc.toString(true));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -446,12 +446,18 @@ public class ACIPConverter {
|
|||
if (!lastGuyWasNonPunct
|
||||
|| (null != lastGuy
|
||||
&& (lpl = lastGuy.get(lastGuy.size() - 1)).size() == 1
|
||||
&& lpl.get(0).getLeft().equals("G")
|
||||
&& // it's (G . anything)
|
||||
// followed by some number
|
||||
// of spaces (at least one,
|
||||
// this one) and then a
|
||||
// comma:
|
||||
// "GU ," and "KU ," each have
|
||||
// tshegs, but "GI ," and "KI
|
||||
// ," each have a Tibetan
|
||||
// space.
|
||||
&& ((lpl.get(0).getLeft().equals("G")
|
||||
|| lpl.get(0).getLeft().equals("K"))
|
||||
&& (lpl.get(0).getRight().indexOf('U') < 0))
|
||||
&&
|
||||
// it's (G . anything)
|
||||
// followed by some number of
|
||||
// spaces (at least one, this
|
||||
// one) and then a comma:
|
||||
peekaheadFindsSpacesAndComma(scan, i+1))) {
|
||||
if (null != writer) {
|
||||
unicode = " ";
|
||||
|
|
|
@ -21,6 +21,7 @@ package org.thdl.tib.text.ttt;
|
|||
import java.util.HashSet;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.StringTokenizer;
|
||||
import java.util.List;
|
||||
|
||||
import org.thdl.tib.text.DuffCode;
|
||||
|
@ -30,7 +31,7 @@ import org.thdl.tib.text.TibTextUtils;
|
|||
|
||||
/** Canonizes some facts regarding the ACIP transcription system.
|
||||
* @author David Chandler */
|
||||
class ACIPRules {
|
||||
public class ACIPRules {
|
||||
/** {Ksh}, the longest consonant, has 3 characters, so this is
|
||||
* three. */
|
||||
public static int MAX_CONSONANT_LENGTH = 3;
|
||||
|
@ -66,7 +67,7 @@ class ACIPRules {
|
|||
// DLC I'm on my own with 'O and 'E and 'OO and 'EE, but
|
||||
// GANG'O appears and I wonder... so here they are. It's
|
||||
// consistent with 'I and 'A and 'U, at least: all the vowels
|
||||
// may appear as K'vowel.
|
||||
// may appear as K'vowel. DLC FIMXE: ask.
|
||||
|
||||
acipVowels.add(baseVowels[i][0]);
|
||||
acipVowels.add('\'' + baseVowels[i][0]);
|
||||
|
@ -140,6 +141,43 @@ class ACIPRules {
|
|||
return consonants.contains(acip);
|
||||
}
|
||||
|
||||
private static HashMap wylieToACIP = null;
|
||||
/** Returns the ACIP transliteration corresponding to the THDL
|
||||
Extended Wylie <em>atom</em> EWTS, or null if EWTS is not
|
||||
recognized. */
|
||||
public static String getACIPForEWTS(String EWTS) {
|
||||
getWylieForACIPConsonant(null);
|
||||
getWylieForACIPOther(null);
|
||||
getWylieForACIPVowel(null);
|
||||
String ans = (String)wylieToACIP.get(EWTS);
|
||||
if (null == ans) {
|
||||
StringBuffer finalAns = new StringBuffer(EWTS.length());
|
||||
StringTokenizer sTok = new StringTokenizer(EWTS, "-+", true);
|
||||
while (sTok.hasMoreTokens()) {
|
||||
String part, tok = sTok.nextToken();
|
||||
if (tok.equals("-") || tok.equals("+"))
|
||||
part = tok;
|
||||
else
|
||||
part = (String)wylieToACIP.get(tok);
|
||||
if (null == part) return null;
|
||||
finalAns.append(part);
|
||||
}
|
||||
return finalAns.toString();
|
||||
}
|
||||
return ans;
|
||||
}
|
||||
|
||||
/** Registers acip->wylie mappings in toWylie; registers
|
||||
wylie->acip mappings in {@link #wylieToACIP}. */
|
||||
private static void putMapping(HashMap toWylie, String ACIP, String EWTS) {
|
||||
toWylie.put(ACIP, EWTS);
|
||||
if (null == wylieToACIP) {
|
||||
wylieToACIP = new HashMap(75);
|
||||
wylieToACIP.put("_", " "); // oddball.
|
||||
}
|
||||
wylieToACIP.put(EWTS, ACIP);
|
||||
}
|
||||
|
||||
private static HashMap acipConsonant2wylie = null;
|
||||
/** Returns the EWTS corresponding to the given ACIP consonant
|
||||
* (without the "A" vowel). Returns null if there is no such
|
||||
|
@ -149,52 +187,52 @@ class ACIPRules {
|
|||
acipConsonant2wylie = new HashMap(37);
|
||||
|
||||
// oddball:
|
||||
acipConsonant2wylie.put("V", "w");
|
||||
putMapping(acipConsonant2wylie, "V", "w");
|
||||
|
||||
// more oddballs:
|
||||
acipConsonant2wylie.put("DH", "d+h");
|
||||
acipConsonant2wylie.put("BH", "b+h");
|
||||
acipConsonant2wylie.put("dH", "D+h");
|
||||
acipConsonant2wylie.put("DZH", "dz+h");
|
||||
acipConsonant2wylie.put("Ksh", "k+Sh");
|
||||
acipConsonant2wylie.put("GH", "g+h");
|
||||
putMapping(acipConsonant2wylie, "DH", "d+h");
|
||||
putMapping(acipConsonant2wylie, "BH", "b+h");
|
||||
putMapping(acipConsonant2wylie, "dH", "D+h");
|
||||
putMapping(acipConsonant2wylie, "DZH", "dz+h");
|
||||
putMapping(acipConsonant2wylie, "Ksh", "k+Sh");
|
||||
putMapping(acipConsonant2wylie, "GH", "g+h");
|
||||
|
||||
|
||||
acipConsonant2wylie.put("K", "k");
|
||||
acipConsonant2wylie.put("KH", "kh");
|
||||
acipConsonant2wylie.put("G", "g");
|
||||
acipConsonant2wylie.put("NG", "ng");
|
||||
acipConsonant2wylie.put("C", "c");
|
||||
acipConsonant2wylie.put("CH", "ch");
|
||||
acipConsonant2wylie.put("J", "j");
|
||||
acipConsonant2wylie.put("NY", "ny");
|
||||
acipConsonant2wylie.put("T", "t");
|
||||
acipConsonant2wylie.put("TH", "th");
|
||||
acipConsonant2wylie.put("D", "d");
|
||||
acipConsonant2wylie.put("N", "n");
|
||||
acipConsonant2wylie.put("P", "p");
|
||||
acipConsonant2wylie.put("PH", "ph");
|
||||
acipConsonant2wylie.put("B", "b");
|
||||
acipConsonant2wylie.put("M", "m");
|
||||
acipConsonant2wylie.put("TZ", "ts");
|
||||
acipConsonant2wylie.put("TS", "tsh");
|
||||
acipConsonant2wylie.put("DZ", "dz");
|
||||
acipConsonant2wylie.put("W", "w");
|
||||
acipConsonant2wylie.put("ZH", "zh");
|
||||
acipConsonant2wylie.put("Z", "z");
|
||||
acipConsonant2wylie.put("'", "'");
|
||||
acipConsonant2wylie.put("Y", "y");
|
||||
acipConsonant2wylie.put("R", "r");
|
||||
acipConsonant2wylie.put("L", "l");
|
||||
acipConsonant2wylie.put("SH", "sh");
|
||||
acipConsonant2wylie.put("S", "s");
|
||||
acipConsonant2wylie.put("H", "h");
|
||||
acipConsonant2wylie.put("A", "a");
|
||||
acipConsonant2wylie.put("t", "T");
|
||||
acipConsonant2wylie.put("th", "Th");
|
||||
acipConsonant2wylie.put("d", "D");
|
||||
acipConsonant2wylie.put("n", "N");
|
||||
acipConsonant2wylie.put("sh", "Sh");
|
||||
putMapping(acipConsonant2wylie, "K", "k");
|
||||
putMapping(acipConsonant2wylie, "KH", "kh");
|
||||
putMapping(acipConsonant2wylie, "G", "g");
|
||||
putMapping(acipConsonant2wylie, "NG", "ng");
|
||||
putMapping(acipConsonant2wylie, "C", "c");
|
||||
putMapping(acipConsonant2wylie, "CH", "ch");
|
||||
putMapping(acipConsonant2wylie, "J", "j");
|
||||
putMapping(acipConsonant2wylie, "NY", "ny");
|
||||
putMapping(acipConsonant2wylie, "T", "t");
|
||||
putMapping(acipConsonant2wylie, "TH", "th");
|
||||
putMapping(acipConsonant2wylie, "D", "d");
|
||||
putMapping(acipConsonant2wylie, "N", "n");
|
||||
putMapping(acipConsonant2wylie, "P", "p");
|
||||
putMapping(acipConsonant2wylie, "PH", "ph");
|
||||
putMapping(acipConsonant2wylie, "B", "b");
|
||||
putMapping(acipConsonant2wylie, "M", "m");
|
||||
putMapping(acipConsonant2wylie, "TZ", "ts");
|
||||
putMapping(acipConsonant2wylie, "TS", "tsh");
|
||||
putMapping(acipConsonant2wylie, "DZ", "dz");
|
||||
putMapping(acipConsonant2wylie, "W", "w");
|
||||
putMapping(acipConsonant2wylie, "ZH", "zh");
|
||||
putMapping(acipConsonant2wylie, "Z", "z");
|
||||
putMapping(acipConsonant2wylie, "'", "'");
|
||||
putMapping(acipConsonant2wylie, "Y", "y");
|
||||
putMapping(acipConsonant2wylie, "R", "r");
|
||||
putMapping(acipConsonant2wylie, "L", "l");
|
||||
putMapping(acipConsonant2wylie, "SH", "sh");
|
||||
putMapping(acipConsonant2wylie, "S", "s");
|
||||
putMapping(acipConsonant2wylie, "H", "h");
|
||||
putMapping(acipConsonant2wylie, "A", "a");
|
||||
putMapping(acipConsonant2wylie, "t", "T");
|
||||
putMapping(acipConsonant2wylie, "th", "Th");
|
||||
putMapping(acipConsonant2wylie, "d", "D");
|
||||
putMapping(acipConsonant2wylie, "n", "N");
|
||||
putMapping(acipConsonant2wylie, "sh", "Sh");
|
||||
}
|
||||
return (String)acipConsonant2wylie.get(acip);
|
||||
}
|
||||
|
@ -207,14 +245,14 @@ class ACIPRules {
|
|||
acipVowel2wylie = new HashMap(baseVowels.length * 4);
|
||||
|
||||
for (int i = 0; i < baseVowels.length; i++) {
|
||||
acipVowel2wylie.put(baseVowels[i][0], baseVowels[i][1]);
|
||||
acipVowel2wylie.put('\'' + baseVowels[i][0], baseVowels[i][2]);
|
||||
acipVowel2wylie.put(baseVowels[i][0] + 'm', baseVowels[i][1] + 'M');
|
||||
acipVowel2wylie.put('\'' + baseVowels[i][0] + 'm', baseVowels[i][2] + 'M');
|
||||
acipVowel2wylie.put(baseVowels[i][0] + ':', baseVowels[i][1] + 'H');
|
||||
acipVowel2wylie.put('\'' + baseVowels[i][0] + ':', baseVowels[i][2] + 'H');
|
||||
acipVowel2wylie.put(baseVowels[i][0] + "m:", baseVowels[i][1] + "MH");
|
||||
acipVowel2wylie.put('\'' + baseVowels[i][0] + "m:", baseVowels[i][2] + "MH");
|
||||
putMapping(acipVowel2wylie, baseVowels[i][0], baseVowels[i][1]);
|
||||
putMapping(acipVowel2wylie, '\'' + baseVowels[i][0], baseVowels[i][2]);
|
||||
putMapping(acipVowel2wylie, baseVowels[i][0] + 'm', baseVowels[i][1] + 'M');
|
||||
putMapping(acipVowel2wylie, '\'' + baseVowels[i][0] + 'm', baseVowels[i][2] + 'M');
|
||||
putMapping(acipVowel2wylie, baseVowels[i][0] + ':', baseVowels[i][1] + 'H');
|
||||
putMapping(acipVowel2wylie, '\'' + baseVowels[i][0] + ':', baseVowels[i][2] + 'H');
|
||||
putMapping(acipVowel2wylie, baseVowels[i][0] + "m:", baseVowels[i][1] + "MH");
|
||||
putMapping(acipVowel2wylie, '\'' + baseVowels[i][0] + "m:", baseVowels[i][2] + "MH");
|
||||
}
|
||||
}
|
||||
return (String)acipVowel2wylie.get(acip);
|
||||
|
@ -228,27 +266,27 @@ class ACIPRules {
|
|||
acipOther2wylie = new HashMap(20);
|
||||
|
||||
// DLC FIXME: check all these again.
|
||||
acipOther2wylie.put(",", "/");
|
||||
acipOther2wylie.put(" ", " ");
|
||||
acipOther2wylie.put(".", "*");
|
||||
acipOther2wylie.put("|", "|");
|
||||
acipOther2wylie.put("`", "!");
|
||||
acipOther2wylie.put(";", ";");
|
||||
acipOther2wylie.put("*", "@");
|
||||
acipOther2wylie.put("#", "@#");
|
||||
acipOther2wylie.put("%", "~X");
|
||||
acipOther2wylie.put("&", "&");
|
||||
putMapping(acipOther2wylie, ",", "/");
|
||||
putMapping(acipOther2wylie, " ", " ");
|
||||
putMapping(acipOther2wylie, ".", "*");
|
||||
putMapping(acipOther2wylie, "|", "|");
|
||||
putMapping(acipOther2wylie, "`", "!");
|
||||
putMapping(acipOther2wylie, ";", ";");
|
||||
putMapping(acipOther2wylie, "*", "@");
|
||||
putMapping(acipOther2wylie, "#", "@#");
|
||||
putMapping(acipOther2wylie, "%", "~X");
|
||||
putMapping(acipOther2wylie, "&", "&");
|
||||
|
||||
acipOther2wylie.put("0", "0");
|
||||
acipOther2wylie.put("1", "1");
|
||||
acipOther2wylie.put("2", "2");
|
||||
acipOther2wylie.put("3", "3");
|
||||
acipOther2wylie.put("4", "4");
|
||||
acipOther2wylie.put("5", "5");
|
||||
acipOther2wylie.put("6", "6");
|
||||
acipOther2wylie.put("7", "7");
|
||||
acipOther2wylie.put("8", "8");
|
||||
acipOther2wylie.put("9", "9");
|
||||
putMapping(acipOther2wylie, "0", "0");
|
||||
putMapping(acipOther2wylie, "1", "1");
|
||||
putMapping(acipOther2wylie, "2", "2");
|
||||
putMapping(acipOther2wylie, "3", "3");
|
||||
putMapping(acipOther2wylie, "4", "4");
|
||||
putMapping(acipOther2wylie, "5", "5");
|
||||
putMapping(acipOther2wylie, "6", "6");
|
||||
putMapping(acipOther2wylie, "7", "7");
|
||||
putMapping(acipOther2wylie, "8", "8");
|
||||
putMapping(acipOther2wylie, "9", "9");
|
||||
}
|
||||
return (String)acipOther2wylie.get(acip);
|
||||
}
|
||||
|
@ -465,39 +503,52 @@ class ACIPRules {
|
|||
|
||||
/** Gets the duffcodes for vowel, such that they look good with
|
||||
* the stack with hash key hashKey, and appends them to r. */
|
||||
static void getDuffForACIPVowel(ArrayList r, DuffCode preceding, String vowel) {
|
||||
static void getDuffForACIPVowel(ArrayList duff, DuffCode preceding, String vowel) {
|
||||
if (null == vowel) return;
|
||||
if (null == getWylieForACIPVowel(vowel)) // FIXME: expensive assertion! Use assert.
|
||||
throw new IllegalArgumentException("Vowel " + vowel + " isn't in the small set of vowels we handle correctly.");
|
||||
|
||||
// Order matters here.
|
||||
boolean context_added[] = new boolean[] { false };
|
||||
if (vowel.startsWith("A")) {
|
||||
TibTextUtils.getVowel(r, preceding, THDLWylieConstants.WYLIE_aVOWEL);
|
||||
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.WYLIE_aVOWEL, context_added);
|
||||
} else if (vowel.indexOf("'U") >= 0) {
|
||||
TibTextUtils.getVowel(r, preceding, "U");
|
||||
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.U_VOWEL, context_added);
|
||||
} else if (vowel.indexOf("'I") >= 0) {
|
||||
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.I_VOWEL, context_added);
|
||||
} else {
|
||||
if (vowel.indexOf('\'') >= 0)
|
||||
TibTextUtils.getVowel(r, preceding, THDLWylieConstants.A_VOWEL);
|
||||
if (vowel.indexOf("EE") >= 0)
|
||||
TibTextUtils.getVowel(r, preceding, THDLWylieConstants.ai_VOWEL);
|
||||
else if (vowel.indexOf('E') >= 0)
|
||||
TibTextUtils.getVowel(r, preceding, THDLWylieConstants.e_VOWEL);
|
||||
if (vowel.indexOf("OO") >= 0)
|
||||
TibTextUtils.getVowel(r, preceding, THDLWylieConstants.au_VOWEL);
|
||||
else if (vowel.indexOf('O') >= 0)
|
||||
TibTextUtils.getVowel(r, preceding, THDLWylieConstants.o_VOWEL);
|
||||
if (vowel.indexOf('I') >= 0)
|
||||
TibTextUtils.getVowel(r, preceding, THDLWylieConstants.i_VOWEL);
|
||||
if (vowel.indexOf('U') >= 0)
|
||||
TibTextUtils.getVowel(r, preceding, THDLWylieConstants.u_VOWEL);
|
||||
if (vowel.indexOf('i') >= 0)
|
||||
TibTextUtils.getVowel(r, preceding, THDLWylieConstants.reverse_i_VOWEL);
|
||||
if (vowel.indexOf('\'') >= 0) {
|
||||
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.A_VOWEL, context_added);
|
||||
}
|
||||
if (vowel.indexOf("EE") >= 0) {
|
||||
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.ai_VOWEL, context_added);
|
||||
} else if (vowel.indexOf('E') >= 0) {
|
||||
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.e_VOWEL, context_added);
|
||||
}
|
||||
if (vowel.indexOf("OO") >= 0) {
|
||||
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.au_VOWEL, context_added);
|
||||
} else if (vowel.indexOf('O') >= 0) {
|
||||
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.o_VOWEL, context_added);
|
||||
}
|
||||
if (vowel.indexOf('I') >= 0) {
|
||||
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.i_VOWEL, context_added);
|
||||
}
|
||||
if (vowel.indexOf('U') >= 0) {
|
||||
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.u_VOWEL, context_added);
|
||||
}
|
||||
if (vowel.indexOf('i') >= 0) {
|
||||
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.reverse_i_VOWEL, context_added);
|
||||
}
|
||||
}
|
||||
// DLC FIXME: Use TMW9.61, the "o'i" special combination, when appropriate.
|
||||
|
||||
if (vowel.indexOf('m') >= 0)
|
||||
r.add(TibetanMachineWeb.getGlyph("M"));
|
||||
if (vowel.indexOf('m') >= 0) {
|
||||
DuffCode last = (DuffCode)duff.get(duff.size() - 1);
|
||||
duff.remove(duff.size() - 1);
|
||||
TibTextUtils.getBindu(duff, last);
|
||||
}
|
||||
if (vowel.indexOf(':') >= 0)
|
||||
r.add(TibetanMachineWeb.getGlyph("H"));
|
||||
duff.add(TibetanMachineWeb.getGlyph("H"));
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
// DLC NOW: KAsh ->Ksh here! optionally!
|
||||
/*
|
||||
The contents of this file are subject to the THDL Open Community License
|
||||
Version 1.0 (the "License"); you may not use this file except in compliance
|
||||
|
|
|
@ -340,6 +340,22 @@ tstHelper("KA'", "[(K . A), (' . )]",
|
|||
new String[] { },
|
||||
"{G+G}{YE}{S}");
|
||||
|
||||
// DLC FIXME: warn about BDE vs. B+DE. color such differently. Maybe an inputter saw B+DE and typed in BDE, not thinking.
|
||||
tstHelper("BDE", "{B}{DE}",
|
||||
new String[] { "{B}{DE}", "{B+DE}" },
|
||||
new String[] { "{B}{DE}" },
|
||||
"{B}{DE}");
|
||||
|
||||
tstHelper("SHR'I", "{SH}{R'I}",
|
||||
null,
|
||||
null,
|
||||
"{SH+R'I}");
|
||||
|
||||
|
||||
// DLC FIXME: test EWTS {pouM}
|
||||
|
||||
// DLC FIXME: do TMW->ACIP->TMW->ACIP round-trip.
|
||||
|
||||
tstHelper("DRUG", "{D}{RU}{G}",
|
||||
new String[] { "{D}{RU}{G}", "{D+RU}{G}" },
|
||||
new String[] { "{D+RU}{G}" },
|
||||
|
@ -7302,6 +7318,7 @@ tstHelper("ZUR");
|
|||
"\u0f04\u0f05\u0f04\u0f05\u0f05\u0f67\u0f74\u0f7e\u0f7f\u0f0b\u0f42\u0fa2\u0f7d\u0f0b\u0f42\u0fb2\u0f7c\u0f08\u0f11\u0f0c\u0f0d");
|
||||
uhelp("*#HUm: K+DHA GRO`;.,",
|
||||
"\u0f04\u0f05\u0f04\u0f05\u0f05\u0f67\u0f74\u0f7e\u0f7f\u0f0b[#ERROR CONVERTING ACIP DOCUMENT: THE TSHEG BAR (\"SYLLABLE\") K+DHA IS ESSENTIALLY NOTHING.]\u0f0b\u0f42\u0fb2\u0f7c\u0f08\u0f11\u0f0c\u0f0d");
|
||||
// DLC FIXME: the file ACIP_SHRI should be made into an ACIP->TMW automated test case
|
||||
}
|
||||
|
||||
/** Tests some more tsheg bars, these from Dr. Lacey's critical
|
||||
|
|
|
@ -197,32 +197,42 @@ class TParseTree {
|
|||
* stack can take every prefix, which is not the case in
|
||||
* reality */
|
||||
public TStackListList getUniqueParse(boolean noPrefixTests) {
|
||||
TStackListList allLegalParses = new TStackListList(2); // save memory
|
||||
// For Sanskrit+Tibetan:
|
||||
TStackListList allNonillegalParses = new TStackListList(2); // save memory
|
||||
// For Tibetan only:
|
||||
TStackListList allStrictlyLegalParses = new TStackListList(2); // save memory
|
||||
|
||||
TStackListList legalParsesWithVowelOnRoot = new TStackListList(1);
|
||||
ParseIterator pi = getParseIterator();
|
||||
while (pi.hasNext()) {
|
||||
TStackList sl = pi.next();
|
||||
BoolPair bpa = sl.isLegalTshegBar(noPrefixTests);
|
||||
if (bpa.isLegal) {
|
||||
if (bpa.isLegalAndHasAVowelOnRoot)
|
||||
BoolTriple bt = sl.isLegalTshegBar(noPrefixTests);
|
||||
if (bt.isLegal) {
|
||||
if (bt.isLegalAndHasAVowelOnRoot)
|
||||
legalParsesWithVowelOnRoot.add(sl);
|
||||
allLegalParses.add(sl);
|
||||
if (!bt.isLegalButSanskrit)
|
||||
allStrictlyLegalParses.add(sl);
|
||||
allNonillegalParses.add(sl);
|
||||
}
|
||||
}
|
||||
if (legalParsesWithVowelOnRoot.size() == 1)
|
||||
return legalParsesWithVowelOnRoot;
|
||||
else {
|
||||
if (allStrictlyLegalParses.size() == 1)
|
||||
return allStrictlyLegalParses;
|
||||
if (allStrictlyLegalParses.size() > 2)
|
||||
throw new Error("can this happen?");
|
||||
if (legalParsesWithVowelOnRoot.size() == 2) {
|
||||
if (legalParsesWithVowelOnRoot.get(0).size() != 1 + legalParsesWithVowelOnRoot.get(1).size())
|
||||
throw new Error("Something other than the G-YA vs. GYA case appeared. Sorry for your trouble! " + legalParsesWithVowelOnRoot.get(0) + " ;; " + legalParsesWithVowelOnRoot.get(1));
|
||||
return new TStackListList(legalParsesWithVowelOnRoot.get(1));
|
||||
}
|
||||
if (allLegalParses.size() == 2) {
|
||||
if (allLegalParses.get(0).size() != 1 + allLegalParses.get(1).size())
|
||||
throw new Error("Something other than the G-YA vs. GYA case appeared. Sorry for your trouble! " + allLegalParses.get(0) + " ;; " + allLegalParses.get(1));
|
||||
return new TStackListList(allLegalParses.get(1));
|
||||
if (allNonillegalParses.size() == 2) {
|
||||
if (allNonillegalParses.get(0).size() != 1 + allNonillegalParses.get(1).size())
|
||||
throw new Error("Something other than the G-YA vs. GYA case appeared. Sorry for your trouble! " + allNonillegalParses.get(0) + " ;; " + allNonillegalParses.get(1));
|
||||
return new TStackListList(allNonillegalParses.get(1));
|
||||
}
|
||||
return allLegalParses;
|
||||
return allNonillegalParses;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -121,16 +121,16 @@ class TStackList {
|
|||
* happen. */
|
||||
public ListIterator listIterator() { return al.listIterator(); }
|
||||
|
||||
/** Returns a pair with {@link BoolPair#isLegal} true if and only
|
||||
* if this list of stacks is a legal tsheg bar by the rules of
|
||||
* Tibetan syntax (sometimes called rules of spelling). If this
|
||||
* is legal, then {@link BoolPair#isLegalAndHasAVowelOnRoot} will
|
||||
* be true if and only if there is an explicit {A} vowel on the
|
||||
* root stack.
|
||||
/** Returns a pair with {@link BoolTriple#isLegal} true if and
|
||||
* only if this list of stacks is a legal tsheg bar by the rules
|
||||
* of Tibetan syntax (sometimes called rules of spelling). If
|
||||
* this is legal, then {@link
|
||||
* BoolTriple#isLegalAndHasAVowelOnRoot} will be true if and only
|
||||
* if there is an explicit {A} vowel on the root stack.
|
||||
* @param noPrefixTests true if you want to pretend that every
|
||||
* stack can take every prefix, which is not the case in
|
||||
* reality */
|
||||
public BoolPair isLegalTshegBar(boolean noPrefixTests) {
|
||||
public BoolTriple isLegalTshegBar(boolean noPrefixTests) {
|
||||
// DLC handle PADMA and other Tibetanized Sanskrit fellows consistently. Right now we only treat single-stack Sanskrit guys as legal.
|
||||
|
||||
TTGCList tgcList = new TTGCList(this);
|
||||
|
@ -162,7 +162,9 @@ class TStackList {
|
|||
}
|
||||
}
|
||||
}
|
||||
return new BoolPair(isLegal, isLegalAndHasAVowelOnRoot);
|
||||
return new BoolTriple(isLegal,
|
||||
(candidateType == "single-sanskrit-gc"),
|
||||
isLegalAndHasAVowelOnRoot);
|
||||
}
|
||||
|
||||
private static final boolean ddebug = false;
|
||||
|
@ -232,11 +234,15 @@ class TStackList {
|
|||
}
|
||||
|
||||
/** Too simple to comment. */
|
||||
class BoolPair {
|
||||
class BoolTriple {
|
||||
boolean isLegal;
|
||||
boolean isLegalButSanskrit; // some subset are legal but legal Sanskrit -- the single sanskrit stacks are this way, such as B+DE.
|
||||
boolean isLegalAndHasAVowelOnRoot;
|
||||
BoolPair(boolean isLegal, boolean isLegalAndHasAVowelOnRoot) {
|
||||
BoolTriple(boolean isLegal,
|
||||
boolean isLegalButSanskrit,
|
||||
boolean isLegalAndHasAVowelOnRoot) {
|
||||
this.isLegal = isLegal;
|
||||
this.isLegalButSanskrit = isLegalButSanskrit;
|
||||
this.isLegalAndHasAVowelOnRoot = isLegalAndHasAVowelOnRoot;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue