Fixed a bunch of bugs; supports le'u'i'o, sgom pa'am, etc.
Better tests. As part of that, I had to break TibetanMachineWeb into TibetanMachineWeb+THDLWylieConstants, because I don't want the class-wide initialization code from TibetanMachineWeb causing errors in LegalTshegBarTest.
This commit is contained in:
parent
1987f7d80a
commit
33b3080068
7 changed files with 468 additions and 230 deletions
117
source/org/thdl/tib/text/THDLWylieConstants.java
Normal file
117
source/org/thdl/tib/text/THDLWylieConstants.java
Normal file
|
@ -0,0 +1,117 @@
|
||||||
|
/*
|
||||||
|
The contents of this file are subject to the THDL Open Community License
|
||||||
|
Version 1.0 (the "License"); you may not use this file except in compliance
|
||||||
|
with the License. You may obtain a copy of the License on the THDL web site
|
||||||
|
(http://www.thdl.org/).
|
||||||
|
|
||||||
|
Software distributed under the License is distributed on an "AS IS" basis,
|
||||||
|
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
|
||||||
|
License for the specific terms governing rights and limitations under the
|
||||||
|
License.
|
||||||
|
|
||||||
|
The Initial Developer of this software is the Tibetan and Himalayan Digital
|
||||||
|
Library (THDL). Portions created by the THDL are Copyright 2001-2003 THDL.
|
||||||
|
All Rights Reserved.
|
||||||
|
|
||||||
|
Contributor(s): ______________________________________.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.thdl.tib.text;
|
||||||
|
|
||||||
|
/** This is where basic, static knowledge of THDL's Extended Wylie is housed.
|
||||||
|
* @see org.thdl.tib.text#TibetanMachineWeb */
|
||||||
|
public interface THDLWylieConstants {
|
||||||
|
/**
|
||||||
|
* the Wylie for bindu/anusvara
|
||||||
|
*/
|
||||||
|
public static final char BINDU = 'M';
|
||||||
|
/**
|
||||||
|
* the Wylie for tsheg
|
||||||
|
*/
|
||||||
|
public static final char TSHEG = ' '; //this character occurs in all ten TMW fonts
|
||||||
|
/**
|
||||||
|
* the Wylie for whitespace
|
||||||
|
*/
|
||||||
|
public static final char SPACE = '_'; //this character occurs in all ten TMW fonts
|
||||||
|
/**
|
||||||
|
* the Sanskrit stacking separator used in Extended Wylie
|
||||||
|
*/
|
||||||
|
public static final char WYLIE_SANSKRIT_STACKING_KEY = '+';
|
||||||
|
/**
|
||||||
|
* the Wylie disambiguating key, as a char
|
||||||
|
*/
|
||||||
|
public static final char WYLIE_DISAMBIGUATING_KEY = '.';
|
||||||
|
/**
|
||||||
|
* the Wylie for the invisible 'a' vowel
|
||||||
|
*/
|
||||||
|
public static final String WYLIE_aVOWEL = "a";
|
||||||
|
/**
|
||||||
|
* the Wylie for achung
|
||||||
|
*/
|
||||||
|
public static final char ACHUNG_character = '\'';
|
||||||
|
/**
|
||||||
|
* the Wylie for achung
|
||||||
|
*/
|
||||||
|
public static final String ACHUNG
|
||||||
|
= new String(new char[] { ACHUNG_character });
|
||||||
|
/**
|
||||||
|
* the Wylie for the 28th of the 30 consonants, sa:
|
||||||
|
*/
|
||||||
|
public static final String SA = "s";
|
||||||
|
/**
|
||||||
|
* the Wylie for the 16th of the 30 consonants, ma:
|
||||||
|
*/
|
||||||
|
public static final String MA = "m";
|
||||||
|
/**
|
||||||
|
* the Wylie for the 4th of the 30 consonants, nga:
|
||||||
|
*/
|
||||||
|
public static final String NGA = "ng";
|
||||||
|
/**
|
||||||
|
* the Wylie for achen
|
||||||
|
*/
|
||||||
|
public static final String ACHEN = "a";
|
||||||
|
/**
|
||||||
|
* the Wylie for gigu
|
||||||
|
*/
|
||||||
|
public static final String i_VOWEL = "i";
|
||||||
|
/**
|
||||||
|
* the Wylie for zhebju
|
||||||
|
*/
|
||||||
|
public static final String u_VOWEL = "u";
|
||||||
|
/**
|
||||||
|
* the Wylie for drengbu
|
||||||
|
*/
|
||||||
|
public static final String e_VOWEL = "e";
|
||||||
|
/**
|
||||||
|
* the Wylie for naro
|
||||||
|
*/
|
||||||
|
public static final String o_VOWEL = "o";
|
||||||
|
/**
|
||||||
|
* the Wylie for double drengbu
|
||||||
|
*/
|
||||||
|
public static final String ai_VOWEL = "ai";
|
||||||
|
/**
|
||||||
|
* the Wylie for double naro
|
||||||
|
*/
|
||||||
|
public static final String au_VOWEL = "au";
|
||||||
|
/**
|
||||||
|
* the Wylie for the subscript achung vowel
|
||||||
|
*/
|
||||||
|
public static final String A_VOWEL = "A";
|
||||||
|
/**
|
||||||
|
* the Wylie for log yig gigu
|
||||||
|
*/
|
||||||
|
public static final String reverse_i_VOWEL = "-i";
|
||||||
|
/**
|
||||||
|
* the Wylie for the vowel achung + gigu
|
||||||
|
*/
|
||||||
|
public static final String I_VOWEL = "I";
|
||||||
|
/**
|
||||||
|
* the Wylie for the vowel achung + zhebju
|
||||||
|
*/
|
||||||
|
public static final String U_VOWEL = "U";
|
||||||
|
/**
|
||||||
|
* the Wylie for the vowel achung + log yig gigu
|
||||||
|
*/
|
||||||
|
public static final String reverse_I_VOWEL = "-I";
|
||||||
|
}
|
|
@ -28,7 +28,8 @@ import org.thdl.util.ThdlDebug;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Provides methods for converting back and forth between Extended
|
* Provides methods for converting back and forth between Extended
|
||||||
* Wylie and TibetanMachineWeb. This class is not instantiable.
|
* Wylie and Tibetan represented in TibetanMachineWeb glyphs. This
|
||||||
|
* class is not instantiable.
|
||||||
*
|
*
|
||||||
* <p>
|
* <p>
|
||||||
* The class provides a variety of static methods for converting
|
* The class provides a variety of static methods for converting
|
||||||
|
@ -37,7 +38,7 @@ import org.thdl.util.ThdlDebug;
|
||||||
* be exported as Rich Text Format.
|
* be exported as Rich Text Format.
|
||||||
*
|
*
|
||||||
* @author Edward Garrett, Tibetan and Himalayan Digital Library */
|
* @author Edward Garrett, Tibetan and Himalayan Digital Library */
|
||||||
public class TibTextUtils {
|
public class TibTextUtils implements THDLWylieConstants {
|
||||||
/** Do not use this contructor. */
|
/** Do not use this contructor. */
|
||||||
private TibTextUtils() { super(); }
|
private TibTextUtils() { super(); }
|
||||||
|
|
||||||
|
@ -255,11 +256,11 @@ public class TibTextUtils {
|
||||||
if (k < 32) //return null if character is just formatting
|
if (k < 32) //return null if character is just formatting
|
||||||
return String.valueOf(c);
|
return String.valueOf(c);
|
||||||
|
|
||||||
if (c == TibetanMachineWeb.WYLIE_DISAMBIGUATING_KEY)
|
if (c == WYLIE_DISAMBIGUATING_KEY)
|
||||||
return String.valueOf(TibetanMachineWeb.WYLIE_DISAMBIGUATING_KEY);
|
return String.valueOf(WYLIE_DISAMBIGUATING_KEY);
|
||||||
|
|
||||||
if (c == TibetanMachineWeb.WYLIE_SANSKRIT_STACKING_KEY)
|
if (c == WYLIE_SANSKRIT_STACKING_KEY)
|
||||||
return String.valueOf(TibetanMachineWeb.WYLIE_SANSKRIT_STACKING_KEY);
|
return String.valueOf(WYLIE_SANSKRIT_STACKING_KEY);
|
||||||
|
|
||||||
for (i=offset+1; i<wylie.length()+1; i++) {
|
for (i=offset+1; i<wylie.length()+1; i++) {
|
||||||
s = wylie.substring(offset, i);
|
s = wylie.substring(offset, i);
|
||||||
|
@ -332,7 +333,7 @@ public class TibTextUtils {
|
||||||
|
|
||||||
chars.clear();
|
chars.clear();
|
||||||
|
|
||||||
if (next.equals(String.valueOf(TibetanMachineWeb.BINDU))) {
|
if (next.equals(String.valueOf(BINDU))) {
|
||||||
if (glyphs.isEmpty())
|
if (glyphs.isEmpty())
|
||||||
dc = null;
|
dc = null;
|
||||||
else
|
else
|
||||||
|
@ -369,7 +370,7 @@ public class TibTextUtils {
|
||||||
break vowel_block;
|
break vowel_block;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
DuffCode[] dc_array = (DuffCode[])TibetanMachineWeb.getTibHash().get(TibetanMachineWeb.ACHEN);
|
DuffCode[] dc_array = (DuffCode[])TibetanMachineWeb.getTibHash().get(ACHEN);
|
||||||
dc = dc_array[TibetanMachineWeb.TMW];
|
dc = dc_array[TibetanMachineWeb.TMW];
|
||||||
glyphs.addAll(getVowel(dc, next));
|
glyphs.addAll(getVowel(dc, next));
|
||||||
}
|
}
|
||||||
|
@ -398,7 +399,7 @@ public class TibTextUtils {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
else if (next.equals(String.valueOf(TibetanMachineWeb.WYLIE_DISAMBIGUATING_KEY))) {
|
else if (next.equals(String.valueOf(WYLIE_DISAMBIGUATING_KEY))) {
|
||||||
if (!chars.isEmpty())
|
if (!chars.isEmpty())
|
||||||
glyphs.addAll(getGlyphs(chars, true, !isSanskrit, isSanskrit));
|
glyphs.addAll(getGlyphs(chars, true, !isSanskrit, isSanskrit));
|
||||||
|
|
||||||
|
@ -406,7 +407,7 @@ public class TibTextUtils {
|
||||||
isSanskrit = false;
|
isSanskrit = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
else if (next.equals(String.valueOf(TibetanMachineWeb.WYLIE_SANSKRIT_STACKING_KEY))) {
|
else if (next.equals(String.valueOf(WYLIE_SANSKRIT_STACKING_KEY))) {
|
||||||
if (!isSanskrit) { //begin sanskrit stack
|
if (!isSanskrit) { //begin sanskrit stack
|
||||||
switch (chars.size()) {
|
switch (chars.size()) {
|
||||||
case 0:
|
case 0:
|
||||||
|
@ -475,13 +476,13 @@ public class TibTextUtils {
|
||||||
List bindus = new ArrayList();
|
List bindus = new ArrayList();
|
||||||
|
|
||||||
if (null == dc) {
|
if (null == dc) {
|
||||||
bindus.add(TibetanMachineWeb.getGlyph(String.valueOf(TibetanMachineWeb.BINDU)));
|
bindus.add(TibetanMachineWeb.getGlyph(String.valueOf(BINDU)));
|
||||||
return bindus;
|
return bindus;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!TibetanMachineWeb.getBinduMap().containsKey(dc)) {
|
if (!TibetanMachineWeb.getBinduMap().containsKey(dc)) {
|
||||||
bindus.add(dc);
|
bindus.add(dc);
|
||||||
bindus.add(TibetanMachineWeb.getGlyph(String.valueOf(TibetanMachineWeb.BINDU)));
|
bindus.add(TibetanMachineWeb.getGlyph(String.valueOf(BINDU)));
|
||||||
return bindus;
|
return bindus;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -524,7 +525,7 @@ public class TibTextUtils {
|
||||||
//this vowel doesn't correspond to a glyph -
|
//this vowel doesn't correspond to a glyph -
|
||||||
//so you just return the original context
|
//so you just return the original context
|
||||||
|
|
||||||
if ( vowel.equals(TibetanMachineWeb.WYLIE_aVOWEL) ||
|
if ( vowel.equals(WYLIE_aVOWEL) ||
|
||||||
TibetanMachineWeb.isTopVowel(context_2)) {
|
TibetanMachineWeb.isTopVowel(context_2)) {
|
||||||
if (context_1 != null)
|
if (context_1 != null)
|
||||||
vowels.add(context_1);
|
vowels.add(context_1);
|
||||||
|
@ -537,34 +538,34 @@ public class TibTextUtils {
|
||||||
//these vowels have one invariant form - therefore,
|
//these vowels have one invariant form - therefore,
|
||||||
//dc_context is just returned along with that form
|
//dc_context is just returned along with that form
|
||||||
|
|
||||||
if (vowel.equals(TibetanMachineWeb.ai_VOWEL)) {
|
if (vowel.equals(ai_VOWEL)) {
|
||||||
if (context_1 != null)
|
if (context_1 != null)
|
||||||
vowels.add(context_1);
|
vowels.add(context_1);
|
||||||
|
|
||||||
vowels.add(context_2);
|
vowels.add(context_2);
|
||||||
DuffCode[] dc_v = (DuffCode[])TibetanMachineWeb.getTibHash().get(TibetanMachineWeb.ai_VOWEL);
|
DuffCode[] dc_v = (DuffCode[])TibetanMachineWeb.getTibHash().get(ai_VOWEL);
|
||||||
vowels.add(dc_v[TibetanMachineWeb.TMW]);
|
vowels.add(dc_v[TibetanMachineWeb.TMW]);
|
||||||
return vowels;
|
return vowels;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (vowel.equals(TibetanMachineWeb.au_VOWEL)) {
|
if (vowel.equals(au_VOWEL)) {
|
||||||
if (context_1 != null)
|
if (context_1 != null)
|
||||||
vowels.add(context_1);
|
vowels.add(context_1);
|
||||||
|
|
||||||
vowels.add(context_2);
|
vowels.add(context_2);
|
||||||
DuffCode[] dc_v = (DuffCode[])TibetanMachineWeb.getTibHash().get(TibetanMachineWeb.au_VOWEL);
|
DuffCode[] dc_v = (DuffCode[])TibetanMachineWeb.getTibHash().get(au_VOWEL);
|
||||||
vowels.add(dc_v[TibetanMachineWeb.TMW]);
|
vowels.add(dc_v[TibetanMachineWeb.TMW]);
|
||||||
return vowels;
|
return vowels;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (vowel.equals(TibetanMachineWeb.reverse_i_VOWEL)) {
|
if (vowel.equals(reverse_i_VOWEL)) {
|
||||||
if (context_1 != null)
|
if (context_1 != null)
|
||||||
vowels.add(context_1);
|
vowels.add(context_1);
|
||||||
|
|
||||||
vowels.add(context_2);
|
vowels.add(context_2);
|
||||||
|
|
||||||
if (!TibetanMachineWeb.isTopVowel(context_2)) {
|
if (!TibetanMachineWeb.isTopVowel(context_2)) {
|
||||||
DuffCode[] dc_v = (DuffCode[])TibetanMachineWeb.getTibHash().get(TibetanMachineWeb.reverse_i_VOWEL);
|
DuffCode[] dc_v = (DuffCode[])TibetanMachineWeb.getTibHash().get(reverse_i_VOWEL);
|
||||||
vowels.add(dc_v[TibetanMachineWeb.TMW]);
|
vowels.add(dc_v[TibetanMachineWeb.TMW]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -578,7 +579,7 @@ public class TibTextUtils {
|
||||||
//returned along with the vowel appropriate to
|
//returned along with the vowel appropriate to
|
||||||
//that context
|
//that context
|
||||||
|
|
||||||
if (vowel.equals(TibetanMachineWeb.i_VOWEL)) {
|
if (vowel.equals(i_VOWEL)) {
|
||||||
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
|
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
|
||||||
DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_i);
|
DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_i);
|
||||||
if (null == dc_v && null != context_1) {
|
if (null == dc_v && null != context_1) {
|
||||||
|
@ -597,7 +598,7 @@ public class TibTextUtils {
|
||||||
return vowels;
|
return vowels;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (vowel.equals(TibetanMachineWeb.e_VOWEL)) {
|
if (vowel.equals(e_VOWEL)) {
|
||||||
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
|
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
|
||||||
DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_e);
|
DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_e);
|
||||||
if (null == dc_v && null != context_1) {
|
if (null == dc_v && null != context_1) {
|
||||||
|
@ -616,7 +617,7 @@ public class TibTextUtils {
|
||||||
return vowels;
|
return vowels;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (vowel.equals(TibetanMachineWeb.o_VOWEL)) {
|
if (vowel.equals(o_VOWEL)) {
|
||||||
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
|
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
|
||||||
DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_o);
|
DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_o);
|
||||||
if (null == dc_v && null != context_1) {
|
if (null == dc_v && null != context_1) {
|
||||||
|
@ -641,7 +642,7 @@ public class TibTextUtils {
|
||||||
//both u and A cannot be affixed to ordinary k or g, but
|
//both u and A cannot be affixed to ordinary k or g, but
|
||||||
//rather the shortened versions of k and g - therefore,
|
//rather the shortened versions of k and g - therefore,
|
||||||
|
|
||||||
if (vowel.equals(TibetanMachineWeb.u_VOWEL)) {
|
if (vowel.equals(u_VOWEL)) {
|
||||||
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
|
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
|
||||||
DuffCode halfHeight = TibetanMachineWeb.getHalfHeightGlyph(hashKey_context);
|
DuffCode halfHeight = TibetanMachineWeb.getHalfHeightGlyph(hashKey_context);
|
||||||
DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_u);
|
DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_u);
|
||||||
|
@ -660,7 +661,7 @@ public class TibTextUtils {
|
||||||
return vowels;
|
return vowels;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (vowel.equals(TibetanMachineWeb.A_VOWEL)) {
|
if (vowel.equals(A_VOWEL)) {
|
||||||
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
|
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
|
||||||
DuffCode halfHeight = TibetanMachineWeb.getHalfHeightGlyph(hashKey_context);
|
DuffCode halfHeight = TibetanMachineWeb.getHalfHeightGlyph(hashKey_context);
|
||||||
DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_A);
|
DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_A);
|
||||||
|
@ -680,7 +681,7 @@ public class TibTextUtils {
|
||||||
return vowels;
|
return vowels;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (vowel.equals(TibetanMachineWeb.U_VOWEL)) {
|
if (vowel.equals(U_VOWEL)) {
|
||||||
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
|
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
|
||||||
DuffCode halfHeight = TibetanMachineWeb.getHalfHeightGlyph(hashKey_context);
|
DuffCode halfHeight = TibetanMachineWeb.getHalfHeightGlyph(hashKey_context);
|
||||||
DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_U);
|
DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_U);
|
||||||
|
@ -704,7 +705,7 @@ public class TibTextUtils {
|
||||||
//require a change from the previous character,
|
//require a change from the previous character,
|
||||||
//and consist of two glyphs themselves
|
//and consist of two glyphs themselves
|
||||||
|
|
||||||
if (vowel.equals(TibetanMachineWeb.I_VOWEL)) {
|
if (vowel.equals(I_VOWEL)) {
|
||||||
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
|
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
|
||||||
DuffCode halfHeight = TibetanMachineWeb.getHalfHeightGlyph(hashKey_context);
|
DuffCode halfHeight = TibetanMachineWeb.getHalfHeightGlyph(hashKey_context);
|
||||||
DuffCode dc_v_sub = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_A);
|
DuffCode dc_v_sub = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_A);
|
||||||
|
@ -726,11 +727,11 @@ public class TibTextUtils {
|
||||||
return vowels;
|
return vowels;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (vowel.equals(TibetanMachineWeb.reverse_I_VOWEL)) {
|
if (vowel.equals(reverse_I_VOWEL)) {
|
||||||
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
|
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
|
||||||
DuffCode halfHeight = TibetanMachineWeb.getHalfHeightGlyph(hashKey_context);
|
DuffCode halfHeight = TibetanMachineWeb.getHalfHeightGlyph(hashKey_context);
|
||||||
DuffCode dc_v_sub = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_A);
|
DuffCode dc_v_sub = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_A);
|
||||||
DuffCode[] tv_array = (DuffCode[])TibetanMachineWeb.getTibHash().get(TibetanMachineWeb.reverse_i_VOWEL);
|
DuffCode[] tv_array = (DuffCode[])TibetanMachineWeb.getTibHash().get(reverse_i_VOWEL);
|
||||||
DuffCode dc_v_sup = tv_array[TibetanMachineWeb.TMW];
|
DuffCode dc_v_sup = tv_array[TibetanMachineWeb.TMW];
|
||||||
|
|
||||||
if (null != context_1)
|
if (null != context_1)
|
||||||
|
@ -766,10 +767,10 @@ public class TibTextUtils {
|
||||||
|
|
||||||
/** Returns "a", unless wylie is already "a". */
|
/** Returns "a", unless wylie is already "a". */
|
||||||
private static String aVowelToUseAfter(String wylie) {
|
private static String aVowelToUseAfter(String wylie) {
|
||||||
if (wylie.equals(TibetanMachineWeb.ACHEN))
|
if (wylie.equals(ACHEN))
|
||||||
return "";
|
return "";
|
||||||
else
|
else
|
||||||
return TibetanMachineWeb.WYLIE_aVOWEL;
|
return WYLIE_aVOWEL;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static String unambiguousPostAVowelWylie(String wylie1,
|
private static String unambiguousPostAVowelWylie(String wylie1,
|
||||||
|
@ -781,7 +782,7 @@ public class TibTextUtils {
|
||||||
if (TibetanMachineWeb.isWylieTop(wylie1)
|
if (TibetanMachineWeb.isWylieTop(wylie1)
|
||||||
&& wylie2.equals(/* FIXME: hard-coded */ "d"))
|
&& wylie2.equals(/* FIXME: hard-coded */ "d"))
|
||||||
disambiguator
|
disambiguator
|
||||||
= new String(new char[] { TibetanMachineWeb.WYLIE_DISAMBIGUATING_KEY });
|
= new String(new char[] { WYLIE_DISAMBIGUATING_KEY });
|
||||||
return wylie1 + disambiguator + wylie2;
|
return wylie1 + disambiguator + wylie2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -831,13 +832,13 @@ public class TibTextUtils {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (TibetanMachineWeb.isAmbiguousWylie(lastWylie, wylie))
|
if (TibetanMachineWeb.isAmbiguousWylie(lastWylie, wylie))
|
||||||
sb.append(TibetanMachineWeb.WYLIE_DISAMBIGUATING_KEY);
|
sb.append(WYLIE_DISAMBIGUATING_KEY);
|
||||||
|
|
||||||
if (!wylie.equals(TibetanMachineWeb.ACHEN)) {
|
if (!wylie.equals(ACHEN)) {
|
||||||
sb.append(wylie);
|
sb.append(wylie);
|
||||||
sb.append(TibetanMachineWeb.WYLIE_aVOWEL);
|
sb.append(WYLIE_aVOWEL);
|
||||||
} else {
|
} else {
|
||||||
sb.append(TibetanMachineWeb.WYLIE_DISAMBIGUATING_KEY);
|
sb.append(WYLIE_DISAMBIGUATING_KEY);
|
||||||
sb.append(wylie);
|
sb.append(wylie);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -861,12 +862,12 @@ public class TibTextUtils {
|
||||||
StringBuffer tailEndWylie = null;
|
StringBuffer tailEndWylie = null;
|
||||||
int effectiveSize = size - 2;
|
int effectiveSize = size - 2;
|
||||||
while (effectiveSize >= 0
|
while (effectiveSize >= 0
|
||||||
&& TibetanMachineWeb.getWylieForGlyph((DuffCode)glyphList.get(effectiveSize)).equals(TibetanMachineWeb.ACHUNG)) {
|
&& TibetanMachineWeb.getWylieForGlyph((DuffCode)glyphList.get(effectiveSize)).equals(ACHUNG)) {
|
||||||
if (null == tailEndWylie) tailEndWylie = new StringBuffer();
|
if (null == tailEndWylie) tailEndWylie = new StringBuffer();
|
||||||
// prepend:
|
// prepend:
|
||||||
tailEndWylie.insert(0,
|
tailEndWylie.insert(0,
|
||||||
TibetanMachineWeb.ACHUNG
|
ACHUNG
|
||||||
+ aVowelToUseAfter(TibetanMachineWeb.ACHUNG)
|
+ aVowelToUseAfter(ACHUNG)
|
||||||
+ TibetanMachineWeb.getWylieForGlyph((DuffCode)glyphList.get(effectiveSize + 1)));
|
+ TibetanMachineWeb.getWylieForGlyph((DuffCode)glyphList.get(effectiveSize + 1)));
|
||||||
effectiveSize -= 2;
|
effectiveSize -= 2;
|
||||||
}
|
}
|
||||||
|
@ -893,8 +894,8 @@ public class TibTextUtils {
|
||||||
for (int i = 0; i < size; i++) {
|
for (int i = 0; i < size; i++) {
|
||||||
wylie = TibetanMachineWeb.getWylieForGlyph((DuffCode)glyphList.get(i));
|
wylie = TibetanMachineWeb.getWylieForGlyph((DuffCode)glyphList.get(i));
|
||||||
if (TibetanMachineWeb.isAmbiguousWylie(lastWylie, wylie)
|
if (TibetanMachineWeb.isAmbiguousWylie(lastWylie, wylie)
|
||||||
|| (i != 0 && wylie.equals(TibetanMachineWeb.ACHEN)))
|
|| (i != 0 && wylie.equals(ACHEN)))
|
||||||
sb.append(TibetanMachineWeb.WYLIE_DISAMBIGUATING_KEY);
|
sb.append(WYLIE_DISAMBIGUATING_KEY);
|
||||||
|
|
||||||
sb.append(wylie + aVowelToUseAfter(wylie));
|
sb.append(wylie + aVowelToUseAfter(wylie));
|
||||||
lastWylie = wylie;
|
lastWylie = wylie;
|
||||||
|
@ -907,8 +908,8 @@ public class TibTextUtils {
|
||||||
while (i+2 < size) {
|
while (i+2 < size) {
|
||||||
wylie = TibetanMachineWeb.getWylieForGlyph((DuffCode)glyphList.get(i));
|
wylie = TibetanMachineWeb.getWylieForGlyph((DuffCode)glyphList.get(i));
|
||||||
if (TibetanMachineWeb.isAmbiguousWylie(lastWylie, wylie)
|
if (TibetanMachineWeb.isAmbiguousWylie(lastWylie, wylie)
|
||||||
|| (i != 0 && wylie.equals(TibetanMachineWeb.ACHEN)))
|
|| (i != 0 && wylie.equals(ACHEN)))
|
||||||
sb.append(TibetanMachineWeb.WYLIE_DISAMBIGUATING_KEY);
|
sb.append(WYLIE_DISAMBIGUATING_KEY);
|
||||||
|
|
||||||
sb.append(wylie);
|
sb.append(wylie);
|
||||||
lastWylie = wylie;
|
lastWylie = wylie;
|
||||||
|
@ -933,7 +934,11 @@ public class TibTextUtils {
|
||||||
if (TibetanMachineWeb.isWylieLeft(wylie0)) {
|
if (TibetanMachineWeb.isWylieLeft(wylie0)) {
|
||||||
/* is it ambiguous? */
|
/* is it ambiguous? */
|
||||||
if (TibetanMachineWeb.isWylieRight(wylie1)
|
if (TibetanMachineWeb.isWylieRight(wylie1)
|
||||||
&& TibetanMachineWeb.SA.equals(wylie2)) {
|
&& SA.equals(wylie2) /* isWylieFarRight would
|
||||||
|
* work, but the list of
|
||||||
|
* 9 words doesn't have
|
||||||
|
* any ending with d --
|
||||||
|
* all end with s. */) {
|
||||||
/* Yes, this is ambiguous. How do we handle it? See this from Andres:
|
/* Yes, this is ambiguous. How do we handle it? See this from Andres:
|
||||||
|
|
||||||
I'm posting this upon David Chandler's request. According to Lobsang
|
I'm posting this upon David Chandler's request. According to Lobsang
|
||||||
|
@ -1001,14 +1006,14 @@ public class TibTextUtils {
|
||||||
// }
|
// }
|
||||||
// }
|
// }
|
||||||
// if (disambiguatorNeeded)
|
// if (disambiguatorNeeded)
|
||||||
// sb.append(TibetanMachineWeb.WYLIE_DISAMBIGUATING_KEY);
|
// sb.append(WYLIE_DISAMBIGUATING_KEY);
|
||||||
|
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
/* no ambiguity. the "a" vowel comes after
|
/* no ambiguity. the "a" vowel comes after
|
||||||
* wylie1. */
|
* wylie1. */
|
||||||
if (TibetanMachineWeb.isAmbiguousWylie(wylie0, wylie1))
|
if (TibetanMachineWeb.isAmbiguousWylie(wylie0, wylie1))
|
||||||
sb.append(TibetanMachineWeb.WYLIE_DISAMBIGUATING_KEY);
|
sb.append(WYLIE_DISAMBIGUATING_KEY);
|
||||||
sb.append(wylie1
|
sb.append(wylie1
|
||||||
+ aVowelToUseAfter(wylie1)
|
+ aVowelToUseAfter(wylie1)
|
||||||
+ wylie2);
|
+ wylie2);
|
||||||
|
@ -1069,8 +1074,8 @@ public class TibTextUtils {
|
||||||
|
|
||||||
if (TibetanMachineWeb.isAmbiguousWylie(lastWylie, currWylie)
|
if (TibetanMachineWeb.isAmbiguousWylie(lastWylie, currWylie)
|
||||||
|| (!lastWylie.equals("")
|
|| (!lastWylie.equals("")
|
||||||
&& currWylie.equals(TibetanMachineWeb.ACHEN)))
|
&& currWylie.equals(ACHEN)))
|
||||||
sb.append(TibetanMachineWeb.WYLIE_DISAMBIGUATING_KEY);
|
sb.append(WYLIE_DISAMBIGUATING_KEY);
|
||||||
|
|
||||||
sb.append(currWylie);
|
sb.append(currWylie);
|
||||||
|
|
||||||
|
@ -1125,7 +1130,7 @@ public class TibTextUtils {
|
||||||
wylie = TibetanMachineWeb.getWylieForGlyph(dcs[i]);
|
wylie = TibetanMachineWeb.getWylieForGlyph(dcs[i]);
|
||||||
|
|
||||||
boolean containsBindu = false;
|
boolean containsBindu = false;
|
||||||
if (wylie.length() > 1 && wylie.charAt(wylie.length()-1) == TibetanMachineWeb.BINDU) {
|
if (wylie.length() > 1 && wylie.charAt(wylie.length()-1) == BINDU) {
|
||||||
char[] cArray = wylie.toCharArray();
|
char[] cArray = wylie.toCharArray();
|
||||||
wylie = new String(cArray, 0, wylie.length()-1);
|
wylie = new String(cArray, 0, wylie.length()-1);
|
||||||
containsBindu = true;
|
containsBindu = true;
|
||||||
|
@ -1157,18 +1162,18 @@ public class TibTextUtils {
|
||||||
} else if (TibetanMachineWeb.isWylieVowel(wylie)) {
|
} else if (TibetanMachineWeb.isWylieVowel(wylie)) {
|
||||||
if (isLastVowel) {
|
if (isLastVowel) {
|
||||||
int len = wylieBuffer.length();
|
int len = wylieBuffer.length();
|
||||||
int A_len = TibetanMachineWeb.A_VOWEL.length();
|
int A_len = A_VOWEL.length();
|
||||||
|
|
||||||
if (wylieBuffer.substring(len-A_len).equals(TibetanMachineWeb.A_VOWEL)) {
|
if (wylieBuffer.substring(len-A_len).equals(A_VOWEL)) {
|
||||||
try {
|
try {
|
||||||
if (wylie.equals(TibetanMachineWeb.i_VOWEL)) {
|
if (wylie.equals(i_VOWEL)) {
|
||||||
wylieBuffer.delete(len-A_len, len);
|
wylieBuffer.delete(len-A_len, len);
|
||||||
wylieBuffer.append(TibetanMachineWeb.I_VOWEL);
|
wylieBuffer.append(I_VOWEL);
|
||||||
isLastVowel = false;
|
isLastVowel = false;
|
||||||
break process_block;
|
break process_block;
|
||||||
} else if (wylie.equals(TibetanMachineWeb.reverse_i_VOWEL)) {
|
} else if (wylie.equals(reverse_i_VOWEL)) {
|
||||||
wylieBuffer.delete(len-A_len, len);
|
wylieBuffer.delete(len-A_len, len);
|
||||||
wylieBuffer.append(TibetanMachineWeb.reverse_I_VOWEL);
|
wylieBuffer.append(reverse_I_VOWEL);
|
||||||
isLastVowel = false;
|
isLastVowel = false;
|
||||||
break process_block;
|
break process_block;
|
||||||
}
|
}
|
||||||
|
@ -1189,7 +1194,7 @@ public class TibTextUtils {
|
||||||
DuffCode top_dc = (DuffCode)glyphList.get(glyphCount-1);
|
DuffCode top_dc = (DuffCode)glyphList.get(glyphCount-1);
|
||||||
String top_wylie = TibetanMachineWeb.getWylieForGlyph(top_dc);
|
String top_wylie = TibetanMachineWeb.getWylieForGlyph(top_dc);
|
||||||
|
|
||||||
if (top_wylie.equals(TibetanMachineWeb.ACHEN)) {
|
if (top_wylie.equals(ACHEN)) {
|
||||||
glyphList.remove(glyphCount-1);
|
glyphList.remove(glyphCount-1);
|
||||||
|
|
||||||
if (glyphCount-1 == 0) {
|
if (glyphCount-1 == 0) {
|
||||||
|
@ -1200,7 +1205,7 @@ public class TibTextUtils {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (top_dc == null || !TibetanMachineWeb.getWylieForGlyph(top_dc).equals(TibetanMachineWeb.ACHUNG)) {
|
if (top_dc == null || !TibetanMachineWeb.getWylieForGlyph(top_dc).equals(ACHUNG)) {
|
||||||
String thisPart = withoutA(glyphList);
|
String thisPart = withoutA(glyphList);
|
||||||
wylieBuffer.append(thisPart); //append consonants in glyphList
|
wylieBuffer.append(thisPart); //append consonants in glyphList
|
||||||
} else {
|
} else {
|
||||||
|
@ -1212,12 +1217,12 @@ public class TibTextUtils {
|
||||||
wylieBuffer.append(thisPart);
|
wylieBuffer.append(thisPart);
|
||||||
}
|
}
|
||||||
|
|
||||||
wylieBuffer.append(TibetanMachineWeb.ACHUNG);
|
wylieBuffer.append(ACHUNG);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (insertDisAmbig)
|
if (insertDisAmbig)
|
||||||
wylieBuffer.append(TibetanMachineWeb.WYLIE_DISAMBIGUATING_KEY);
|
wylieBuffer.append(WYLIE_DISAMBIGUATING_KEY);
|
||||||
|
|
||||||
wylieBuffer.append(wylie); //append vowel
|
wylieBuffer.append(wylie); //append vowel
|
||||||
|
|
||||||
|
@ -1234,7 +1239,7 @@ public class TibTextUtils {
|
||||||
if (containsBindu) {
|
if (containsBindu) {
|
||||||
isLastVowel = false;
|
isLastVowel = false;
|
||||||
wylieBuffer.append(withoutA(glyphList));
|
wylieBuffer.append(withoutA(glyphList));
|
||||||
wylieBuffer.append(TibetanMachineWeb.BINDU); //append the bindu
|
wylieBuffer.append(BINDU); //append the bindu
|
||||||
glyphList.clear();
|
glyphList.clear();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -10,7 +10,7 @@ License for the specific terms governing rights and limitations under the
|
||||||
License.
|
License.
|
||||||
|
|
||||||
The Initial Developer of this software is the Tibetan and Himalayan Digital
|
The Initial Developer of this software is the Tibetan and Himalayan Digital
|
||||||
Library (THDL). Portions created by the THDL are Copyright 2001 THDL.
|
Library (THDL). Portions created by the THDL are Copyright 2001-2003 THDL.
|
||||||
All Rights Reserved.
|
All Rights Reserved.
|
||||||
|
|
||||||
Contributor(s): ______________________________________.
|
Contributor(s): ______________________________________.
|
||||||
|
@ -44,7 +44,7 @@ import org.thdl.util.ThdlOptions;
|
||||||
* @version 1.0
|
* @version 1.0
|
||||||
*/
|
*/
|
||||||
// FIXME: for speed, make either this class, its methods, or both, final?
|
// FIXME: for speed, make either this class, its methods, or both, final?
|
||||||
public class TibetanMachineWeb {
|
public class TibetanMachineWeb implements THDLWylieConstants {
|
||||||
/** This addresses bug 624133, "Input freezes after impossible
|
/** This addresses bug 624133, "Input freezes after impossible
|
||||||
* character". The input sequences that are valid in Extended
|
* character". The input sequences that are valid in Extended
|
||||||
* Wylie. For example, "Sh" will be in this container, but "S"
|
* Wylie. For example, "Sh" will be in this container, but "S"
|
||||||
|
@ -109,86 +109,6 @@ public class TibetanMachineWeb {
|
||||||
"TibetanMachineWeb9".intern()
|
"TibetanMachineWeb9".intern()
|
||||||
};
|
};
|
||||||
/**
|
/**
|
||||||
* the Wylie for bindu/anusvara
|
|
||||||
*/
|
|
||||||
public static final char BINDU = 'M';
|
|
||||||
/**
|
|
||||||
* the Wylie for tsheg
|
|
||||||
*/
|
|
||||||
public static final char TSHEG = ' '; //this character occurs in all ten TMW fonts
|
|
||||||
/**
|
|
||||||
* the Wylie for whitespace
|
|
||||||
*/
|
|
||||||
public static final char SPACE = '_'; //this character occurs in all ten TMW fonts
|
|
||||||
/**
|
|
||||||
* the Sanskrit stacking separator used in Extended Wylie
|
|
||||||
*/
|
|
||||||
public static final char WYLIE_SANSKRIT_STACKING_KEY = '+';
|
|
||||||
/**
|
|
||||||
* the Wylie disambiguating key, as a char
|
|
||||||
*/
|
|
||||||
public static final char WYLIE_DISAMBIGUATING_KEY = '.';
|
|
||||||
/**
|
|
||||||
* the Wylie for the invisible 'a' vowel
|
|
||||||
*/
|
|
||||||
public static final String WYLIE_aVOWEL = "a";
|
|
||||||
/**
|
|
||||||
* the Wylie for achung
|
|
||||||
*/
|
|
||||||
public static final String ACHUNG = "'";
|
|
||||||
/**
|
|
||||||
* the Wylie for the 28th of the 30 consonants, sa:
|
|
||||||
*/
|
|
||||||
public static final String SA = "s";
|
|
||||||
/**
|
|
||||||
* the Wylie for achen
|
|
||||||
*/
|
|
||||||
public static final String ACHEN = "a";
|
|
||||||
/**
|
|
||||||
* the Wylie for gigu
|
|
||||||
*/
|
|
||||||
public static final String i_VOWEL = "i";
|
|
||||||
/**
|
|
||||||
* the Wylie for zhebju
|
|
||||||
*/
|
|
||||||
public static final String u_VOWEL = "u";
|
|
||||||
/**
|
|
||||||
* the Wylie for drengbu
|
|
||||||
*/
|
|
||||||
public static final String e_VOWEL = "e";
|
|
||||||
/**
|
|
||||||
* the Wylie for naro
|
|
||||||
*/
|
|
||||||
public static final String o_VOWEL = "o";
|
|
||||||
/**
|
|
||||||
* the Wylie for double drengbu
|
|
||||||
*/
|
|
||||||
public static final String ai_VOWEL = "ai";
|
|
||||||
/**
|
|
||||||
* the Wylie for double naro
|
|
||||||
*/
|
|
||||||
public static final String au_VOWEL = "au";
|
|
||||||
/**
|
|
||||||
* the Wylie for the subscript achung vowel
|
|
||||||
*/
|
|
||||||
public static final String A_VOWEL = "A";
|
|
||||||
/**
|
|
||||||
* the Wylie for log yig gigu
|
|
||||||
*/
|
|
||||||
public static final String reverse_i_VOWEL = "-i";
|
|
||||||
/**
|
|
||||||
* the Wylie for the vowel achung + gigu
|
|
||||||
*/
|
|
||||||
public static final String I_VOWEL = "I";
|
|
||||||
/**
|
|
||||||
* the Wylie for the vowel achung + zhebju
|
|
||||||
*/
|
|
||||||
public static final String U_VOWEL = "U";
|
|
||||||
/**
|
|
||||||
* the Wylie for the vowel achung + log yig gigu
|
|
||||||
*/
|
|
||||||
public static final String reverse_I_VOWEL = "-I";
|
|
||||||
/**
|
|
||||||
* represents where in an array of DuffCodes you
|
* represents where in an array of DuffCodes you
|
||||||
* find the TibetanMachine equivalence of a glyph
|
* find the TibetanMachine equivalence of a glyph
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -18,7 +18,7 @@ Contributor(s): ______________________________________.
|
||||||
|
|
||||||
package org.thdl.tib.text.tshegbar;
|
package org.thdl.tib.text.tshegbar;
|
||||||
|
|
||||||
import org.thdl.tib.text.TibetanMachineWeb;
|
import org.thdl.tib.text.THDLWylieConstants;
|
||||||
import org.thdl.util.ThdlDebug;
|
import org.thdl.util.ThdlDebug;
|
||||||
|
|
||||||
/** <p>A LegalTshegBar is a simple Tibetan syllable or a syllable with
|
/** <p>A LegalTshegBar is a simple Tibetan syllable or a syllable with
|
||||||
|
@ -29,7 +29,7 @@ import org.thdl.util.ThdlDebug;
|
||||||
* <ul>
|
* <ul>
|
||||||
*
|
*
|
||||||
* <li>It contains at most one prefix, which must be one of {EWC_ga,
|
* <li>It contains at most one prefix, which must be one of {EWC_ga,
|
||||||
* EWC_da, EWC_ba, EWC_ma, EWC_achen} and must be prefixable to the
|
* EWC_da, EWC_ba, EWC_ma, EWC_achung} and must be prefixable to the
|
||||||
* root letter.</li>
|
* root letter.</li>
|
||||||
*
|
*
|
||||||
* <li>It contains no vocalic modifications</li>
|
* <li>It contains no vocalic modifications</li>
|
||||||
|
@ -39,12 +39,11 @@ import org.thdl.util.ThdlDebug;
|
||||||
*
|
*
|
||||||
* <li>It contains at most one vowel from the set {EWV_a, EWV_i,
|
* <li>It contains at most one vowel from the set {EWV_a, EWV_i,
|
||||||
* EWV_e, EWV_u}, and that vowel is on the root stack. The one
|
* EWV_e, EWV_u}, and that vowel is on the root stack. The one
|
||||||
* exception is that a 'i suffix is permitted (this is a connective
|
* exception is that 'i (i.e., the connective case marker), 'u, and
|
||||||
* case marker).</li>
|
* 'o suffixes are permitted.</li>
|
||||||
*
|
*
|
||||||
* <li>It has at most one suffix, which is a single consonant or the
|
* <li>It has at most one suffix, which is a single consonant or a
|
||||||
* special connective case marker 'i (i.e.,
|
* string consisting of 'i, 'u, 'o, 'am, and 'ang.</li>
|
||||||
* <code>"\u0F60\u0F72"</code>).</li>
|
|
||||||
*
|
*
|
||||||
*
|
*
|
||||||
DLC FIXME: we must allow many suffixes. See Andres' e-mail below:
|
DLC FIXME: we must allow many suffixes. See Andres' e-mail below:
|
||||||
|
@ -69,10 +68,8 @@ And also there are cases where they combine. For ex you can have
|
||||||
*
|
*
|
||||||
*
|
*
|
||||||
* <li>It may contain a EWC_sa or EWC_da postsuffix iff there exists
|
* <li>It may contain a EWC_sa or EWC_da postsuffix iff there exists
|
||||||
* a suffix (and a suffix that is not the special connective case
|
* a suffix (and a suffix that is not based on 'i, 'o, 'u, 'am, and
|
||||||
* marker 'i (i.e., <code>"\u0F60\u0F72"</code>) (DLC FIXME: 'o and
|
* 'ang).</li>
|
||||||
* 'am maybe? I asked in the "Embarrasing error in wylie conversion"
|
|
||||||
* bug report.).</li>
|
|
||||||
*
|
*
|
||||||
* <li>The root stack follows the rules of Tibetan syntax, meaning
|
* <li>The root stack follows the rules of Tibetan syntax, meaning
|
||||||
* that the following holds:
|
* that the following holds:
|
||||||
|
@ -112,7 +109,7 @@ And also there are cases where they combine. For ex you can have
|
||||||
* e.g. p. 548.</p>
|
* e.g. p. 548.</p>
|
||||||
*
|
*
|
||||||
* @author David Chandler */
|
* @author David Chandler */
|
||||||
public class LegalTshegBar
|
public final class LegalTshegBar
|
||||||
extends TshegBar
|
extends TshegBar
|
||||||
implements UnicodeConstants
|
implements UnicodeConstants
|
||||||
{
|
{
|
||||||
|
@ -129,8 +126,8 @@ public class LegalTshegBar
|
||||||
private boolean hasWaZur;
|
private boolean hasWaZur;
|
||||||
/** true iff EW_wa_zur is under the root syllable. */
|
/** true iff EW_wa_zur is under the root syllable. */
|
||||||
private boolean hasAChung;
|
private boolean hasAChung;
|
||||||
/** If this is a string, it is of a single codepoint or is equal
|
/** If this is a string, it is of a single codepoint or is a
|
||||||
* to {@link #getConnectiveCaseSuffix()} */
|
* string formed from 'i, 'o, 'u, 'am, and 'ang. */
|
||||||
private String suffix;
|
private String suffix;
|
||||||
/** EW_da, EW_sa, or EW_ABSENT */
|
/** EW_da, EW_sa, or EW_ABSENT */
|
||||||
private char postsuffix;
|
private char postsuffix;
|
||||||
|
@ -236,24 +233,24 @@ public class LegalTshegBar
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns null if there is no suffix, or a string containing the
|
/** Returns null if there is no suffix, or a string containing the
|
||||||
* one consonant or a string <code>"\u0F60\u0F72"</code>
|
* one consonant or a string like <code>"\u0F60\u0F72"</code>
|
||||||
* containing two codepoints in the special case that the suffix
|
* in the case that the suffix
|
||||||
* is that connective case marker {@link
|
* is 'i, 'u'i'o, 'am, 'ang, etc. */
|
||||||
* #getConnectiveCaseSuffix()}. */
|
|
||||||
public String getSuffix() {
|
public String getSuffix() {
|
||||||
return suffix;
|
return suffix;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns true iff there is a suffixed consonant or a suffixed
|
/** Returns true iff there is a suffixed consonant or a suffixed
|
||||||
* <code>'i</code> (DLC FIXME). */
|
* string consisting of 'i, 'u, 'o, 'am, and 'ang. */
|
||||||
public boolean hasSuffix() {
|
public boolean hasSuffix() {
|
||||||
return (null != suffix);
|
return (null != suffix);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns true iff there is a single, suffixed consonant. This
|
/** Returns true iff there is a single, suffixed consonant. This
|
||||||
means that suffixes like <code>'am</code>, <code>'i</code>,
|
means that suffixes made from <code>'am</code>,
|
||||||
<code>'u</code>, and <code>'o</code> are not present, but this
|
<code>'ang</code> <code>'i</code>, <code>'u</code>, and
|
||||||
does not rule out the presence of a postsuffix. */
|
<code>'o</code> are not present, but this does not rule out
|
||||||
|
the presence of a postsuffix. */
|
||||||
public boolean hasSimpleSuffix() {
|
public boolean hasSimpleSuffix() {
|
||||||
return ((null != suffix) && (1 == suffix.length()));
|
return ((null != suffix) && (1 == suffix.length()));
|
||||||
}
|
}
|
||||||
|
@ -280,12 +277,6 @@ public class LegalTshegBar
|
||||||
return (EW_ABSENT != postsuffix);
|
return (EW_ABSENT != postsuffix);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns true iff this syllable has a <code>'i</code>
|
|
||||||
* suffix. */
|
|
||||||
public boolean hasConnectiveCaseMarkerSuffix() {
|
|
||||||
return getSuffix().equals(getConnectiveCaseSuffix());
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Returns the root consonant. */
|
/** Returns the root consonant. */
|
||||||
public char getRootLetter() {
|
public char getRootLetter() {
|
||||||
return rootLetter;
|
return rootLetter;
|
||||||
|
@ -324,7 +315,7 @@ public class LegalTshegBar
|
||||||
|
|
||||||
private final static String possibleSuffixes
|
private final static String possibleSuffixes
|
||||||
= new String(new char[] {
|
= new String(new char[] {
|
||||||
EWC_ga, EWC_nga, EWC_da, EWC_na, EWC_ba, EWC_ma, EWC_achen,
|
EWC_ga, EWC_nga, EWC_da, EWC_na, EWC_ba, EWC_ma, EWC_achung,
|
||||||
EWC_ra, EWC_la, EWC_sa
|
EWC_ra, EWC_la, EWC_sa
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -340,18 +331,6 @@ public class LegalTshegBar
|
||||||
// EWSUB_ra_btags.
|
// EWSUB_ra_btags.
|
||||||
}
|
}
|
||||||
|
|
||||||
private final static String connectiveCaseSuffix
|
|
||||||
= new String(new char[] {
|
|
||||||
EWC_achen, EWV_i
|
|
||||||
});
|
|
||||||
|
|
||||||
/** Returns a two-codepoint string consisting of the Unicode
|
|
||||||
* representation of what THDL Extended Wylie calls
|
|
||||||
* <code>'i</code>. */
|
|
||||||
public static String getConnectiveCaseSuffix() {
|
|
||||||
return connectiveCaseSuffix;
|
|
||||||
}
|
|
||||||
|
|
||||||
private final static String thirtyConsonants
|
private final static String thirtyConsonants
|
||||||
= new String(new char[] {
|
= new String(new char[] {
|
||||||
EWC_ga, EWC_kha, EWC_ga, EWC_nga,
|
EWC_ga, EWC_kha, EWC_ga, EWC_nga,
|
||||||
|
@ -359,7 +338,7 @@ public class LegalTshegBar
|
||||||
EWC_ta, EWC_tha, EWC_da, EWC_na,
|
EWC_ta, EWC_tha, EWC_da, EWC_na,
|
||||||
EWC_pa, EWC_pha, EWC_ba, EWC_ma,
|
EWC_pa, EWC_pha, EWC_ba, EWC_ma,
|
||||||
EWC_tsa, EWC_tsha, EWC_dza, EWC_wa,
|
EWC_tsa, EWC_tsha, EWC_dza, EWC_wa,
|
||||||
EWC_zha, EWC_za, EWC_achen, EWC_ya,
|
EWC_zha, EWC_za, EWC_achung, EWC_ya,
|
||||||
EWC_ra, EWC_la, EWC_sha, EWC_sa,
|
EWC_ra, EWC_la, EWC_sha, EWC_sa,
|
||||||
EWC_ha, EWC_a
|
EWC_ha, EWC_a
|
||||||
});
|
});
|
||||||
|
@ -388,10 +367,10 @@ public class LegalTshegBar
|
||||||
<p>This is not very efficient.</p> */
|
<p>This is not very efficient.</p> */
|
||||||
public static String[] getPossibleSuffixParticles() {
|
public static String[] getPossibleSuffixParticles() {
|
||||||
return new String[] {
|
return new String[] {
|
||||||
new String(new char[] { EWC_achen, EWV_i }),
|
new String(new char[] { EWC_achung, EWV_i }),
|
||||||
new String(new char[] { EWC_achen, EWV_o }),
|
new String(new char[] { EWC_achung, EWV_o }),
|
||||||
new String(new char[] { EWC_achen, EWV_u }),
|
new String(new char[] { EWC_achung, EWV_u }),
|
||||||
new String(new char[] { EWC_achen, EWC_ma }),
|
new String(new char[] { EWC_achung, EWC_ma }),
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -402,7 +381,7 @@ public class LegalTshegBar
|
||||||
* @see org.thdl.tib.text.tshegbar.UnicodeConstants */
|
* @see org.thdl.tib.text.tshegbar.UnicodeConstants */
|
||||||
public static String getTheFivePrefixes() {
|
public static String getTheFivePrefixes() {
|
||||||
final String s = new String(new char[] {
|
final String s = new String(new char[] {
|
||||||
EWC_ga, EWC_da, EWC_ba, EWC_ma, EWC_achen
|
EWC_ga, EWC_da, EWC_ba, EWC_ma, EWC_achung
|
||||||
});
|
});
|
||||||
ThdlDebug.verify(s.length() == 5); // DLC put this into a JUnit test to avoid the slow-down.
|
ThdlDebug.verify(s.length() == 5); // DLC put this into a JUnit test to avoid the slow-down.
|
||||||
return s;
|
return s;
|
||||||
|
@ -416,27 +395,104 @@ public class LegalTshegBar
|
||||||
|
|
||||||
/** Returns a String containing the nominal Unicode
|
/** Returns a String containing the nominal Unicode
|
||||||
* representations of the ten suffixes. The suffixes are in
|
* representations of the ten suffixes. The suffixes are in
|
||||||
* dictionary order.
|
* dictionary order. This doesn't include oddballs like suffixes
|
||||||
* @see #getConnectiveCaseSuffix()
|
* based on 'i, 'u, 'o, 'am, and 'ang.
|
||||||
* @see org.thdl.tib.text.tshegbar.UnicodeConstants */
|
* @see org.thdl.tib.text.tshegbar.UnicodeConstants */
|
||||||
public static String getTheTenSuffixes() {
|
public static String getTheTenSuffixes() {
|
||||||
final String s = new String(new char[] {
|
final String s = new String(new char[] {
|
||||||
EWC_ga, EWC_nga, EWC_da, EWC_na, EWC_ba,
|
EWC_ga, EWC_nga, EWC_da, EWC_na, EWC_ba,
|
||||||
EWC_ma, EWC_achen, EWC_ra, EWC_la, EWC_sa
|
EWC_ma, EWC_achung, EWC_ra, EWC_la, EWC_sa
|
||||||
});
|
});
|
||||||
ThdlDebug.verify(s.length() == 10); // DLC put this into a JUnit test to avoid the slow-down.
|
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns true iff x is the preferred, nominal Unicode
|
/** Returns true iff x is the preferred, nominal Unicode
|
||||||
* representation of one of the ten suffixes.
|
* representation of one of the ten suffixes.
|
||||||
* @see #getConnectiveCaseSuffix()
|
|
||||||
*/
|
*/
|
||||||
public static boolean isNominalRepresentationOfSimpleSuffix(char x) {
|
public static boolean isNominalRepresentationOfSimpleSuffix(char x) {
|
||||||
return (-1 != getTheTenSuffixes().indexOf(x));
|
return (-1 != getTheTenSuffixes().indexOf(x));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/** Legal suffix-like particles, excluding the ten suffixes. If
|
||||||
|
* you add one, be sure that a tsheg-bar with it has the extended
|
||||||
|
* wylie you wish by adding the correct extended Wylie with it. */
|
||||||
|
private static final String[][] oddball_suffixes = new String[][] {
|
||||||
|
{
|
||||||
|
// connective case marker:
|
||||||
|
new String( new char[] {
|
||||||
|
EWC_achung, EWV_i
|
||||||
|
}),
|
||||||
|
THDLWylieConstants.ACHUNG + THDLWylieConstants.i_VOWEL
|
||||||
|
},
|
||||||
|
{
|
||||||
|
new String( new char[] {
|
||||||
|
EWC_achung, EWV_u
|
||||||
|
}),
|
||||||
|
THDLWylieConstants.ACHUNG + THDLWylieConstants.u_VOWEL
|
||||||
|
},
|
||||||
|
{
|
||||||
|
// in at least one context, this shows end of sentence:
|
||||||
|
new String( new char[] {
|
||||||
|
EWC_achung, EWV_o
|
||||||
|
}),
|
||||||
|
THDLWylieConstants.ACHUNG + THDLWylieConstants.o_VOWEL
|
||||||
|
},
|
||||||
|
{
|
||||||
|
// as in sgom pa'am:
|
||||||
|
new String( new char[] {
|
||||||
|
EWC_achung, EWC_ma
|
||||||
|
}),
|
||||||
|
THDLWylieConstants.ACHUNG + THDLWylieConstants.WYLIE_aVOWEL
|
||||||
|
+ THDLWylieConstants.MA
|
||||||
|
},
|
||||||
|
{
|
||||||
|
// meaning or, as opposed to and:
|
||||||
|
new String( new char[] {
|
||||||
|
EWC_achung, EWC_nga
|
||||||
|
}),
|
||||||
|
THDLWylieConstants.ACHUNG + THDLWylieConstants.WYLIE_aVOWEL
|
||||||
|
+ THDLWylieConstants.NGA
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/** Returns true iff suffix is 'i, 'o, 'u, 'am, 'ang, or a
|
||||||
|
* concatenation like 'u'i'o. Returns false otherwise (including
|
||||||
|
* the case that suffix is the empty string). */
|
||||||
|
public static boolean isAchungBasedSuffix(String suffix) {
|
||||||
|
int i = 0; // so that the empty string causes false to be returned.
|
||||||
|
while (i == 0 || !suffix.equals("")) {
|
||||||
|
boolean startsWithOneOfThem = false;
|
||||||
|
for (int x = 0; x < oddball_suffixes.length; x++) {
|
||||||
|
if (suffix.startsWith(oddball_suffixes[x][0])) {
|
||||||
|
startsWithOneOfThem = true;
|
||||||
|
suffix = suffix.substring(oddball_suffixes[x][0].length());
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!startsWithOneOfThem)
|
||||||
|
return false;
|
||||||
|
++i;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String getTHDLWylieForOddballSuffix(String suffix) {
|
||||||
|
// FIXME: assert that isAchungBasedSuffix
|
||||||
|
StringBuffer wylie = new StringBuffer();
|
||||||
|
while (!suffix.equals("")) {
|
||||||
|
for (int x = 0; x < oddball_suffixes.length; x++) {
|
||||||
|
if (suffix.startsWith(oddball_suffixes[x][0])) {
|
||||||
|
wylie.append(oddball_suffixes[x][1]);
|
||||||
|
suffix = suffix.substring(oddball_suffixes[x][0].length());
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return wylie.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/** Returns true iff the given (rootLetter, subjoinedLetter)
|
/** Returns true iff the given (rootLetter, subjoinedLetter)
|
||||||
combination can accept an additional wa-zur. Only g-r-w,
|
combination can accept an additional wa-zur. Only g-r-w,
|
||||||
d-r-w, and ph-y-w fall into this category according to
|
d-r-w, and ph-y-w fall into this category according to
|
||||||
|
@ -595,8 +651,8 @@ public class LegalTshegBar
|
||||||
* @param subjoinedLetter the optional, subscribed consonant
|
* @param subjoinedLetter the optional, subscribed consonant
|
||||||
* @param suffix the optional suffix, which is null, a String
|
* @param suffix the optional suffix, which is null, a String
|
||||||
* consisting of a single consonant (i.e. a single,
|
* consisting of a single consonant (i.e. a single,
|
||||||
* nondecomposable codepoint) except in the special case that
|
* nondecomposable codepoint), or a string of 'i (U+0F, 'u, 'o, 'am,
|
||||||
* this is {@link #getConnectiveCaseSuffix()}
|
* and 'ang.
|
||||||
* @param postsuffix the optional postsuffix, which should be
|
* @param postsuffix the optional postsuffix, which should be
|
||||||
* EWC_sa or EWC_da
|
* EWC_sa or EWC_da
|
||||||
* @param errorBuffer if non-null, and if the return code is
|
* @param errorBuffer if non-null, and if the return code is
|
||||||
|
@ -763,13 +819,12 @@ public class LegalTshegBar
|
||||||
} // subjoinedLetter tests
|
} // subjoinedLetter tests
|
||||||
|
|
||||||
// Suffix tests:
|
// Suffix tests:
|
||||||
// DLC NOW -- allow 'o, 'u, 'am, etc.
|
|
||||||
if (null != suffix) {
|
if (null != suffix) {
|
||||||
if (!getConnectiveCaseSuffix().equals(suffix)) {
|
if (!isAchungBasedSuffix(suffix)) {
|
||||||
if (suffix.length() != 1) {
|
if (suffix.length() != 1) {
|
||||||
return internalThrowThing(throwIfIllegal,
|
return internalThrowThing(throwIfIllegal,
|
||||||
errorBuf,
|
errorBuf,
|
||||||
"Illegal suffix -- not one of the legal complex suffixes like 'u, 'o, 'i, 'am.");
|
"Illegal suffix -- not one of the legal complex suffixes like 'u, 'o, 'i, 'am, 'ang.");
|
||||||
}
|
}
|
||||||
if (!isNominalRepresentationOfSimpleSuffix(suffix.charAt(0))) {
|
if (!isNominalRepresentationOfSimpleSuffix(suffix.charAt(0))) {
|
||||||
return internalThrowThing(throwIfIllegal,
|
return internalThrowThing(throwIfIllegal,
|
||||||
|
@ -784,6 +839,10 @@ public class LegalTshegBar
|
||||||
return internalThrowThing(throwIfIllegal,
|
return internalThrowThing(throwIfIllegal,
|
||||||
errorBuf,
|
errorBuf,
|
||||||
"You cannot have a postsuffix unless you also have a suffix.");
|
"You cannot have a postsuffix unless you also have a suffix.");
|
||||||
|
if (isAchungBasedSuffix(suffix))
|
||||||
|
return internalThrowThing(throwIfIllegal,
|
||||||
|
errorBuf,
|
||||||
|
"You cannot have a postsuffix if you have a suffix based on 'i, 'o, 'u, 'am, and 'ang.");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (EW_ABSENT != headLetter) {
|
if (EW_ABSENT != headLetter) {
|
||||||
|
@ -812,7 +871,9 @@ public class LegalTshegBar
|
||||||
"The head letter sa cannot be used with that root letter.");
|
"The head letter sa cannot be used with that root letter.");
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// '\u0F6A' is not a valid head letter, even for
|
// Illegal head letter.
|
||||||
|
//
|
||||||
|
// Note: U+0F6A is not a valid head letter, even for
|
||||||
// "rnya". Use EWC_ra instead.
|
// "rnya". Use EWC_ra instead.
|
||||||
return internalThrowThing(throwIfIllegal,
|
return internalThrowThing(throwIfIllegal,
|
||||||
errorBuf,
|
errorBuf,
|
||||||
|
@ -827,14 +888,14 @@ public class LegalTshegBar
|
||||||
&& EWV_e != vowel
|
&& EWV_e != vowel
|
||||||
&& EWV_o != vowel)
|
&& EWV_o != vowel)
|
||||||
{
|
{
|
||||||
if (EWC_achen == vowel)
|
if (EWC_achung == vowel)
|
||||||
return internalThrowThing(throwIfIllegal,
|
return internalThrowThing(throwIfIllegal,
|
||||||
errorBuf,
|
errorBuf,
|
||||||
"The vowel given is not valid. Use EW_ABSENT for the EWC_achen sound.");
|
"The vowel given is not valid. Use EW_ABSENT for the EWC_achung sound.");
|
||||||
if ('\u0F71' == vowel)
|
if ('\u0F71' == vowel)
|
||||||
return internalThrowThing(throwIfIllegal,
|
return internalThrowThing(throwIfIllegal,
|
||||||
errorBuf,
|
errorBuf,
|
||||||
"a-chung cannot be used in a simple Tibetan syllable."); // DLC FIXME: what about pA?
|
"a-chung can be used, but there is a flag for it; you don't call it the vowel.");
|
||||||
return internalThrowThing(throwIfIllegal,
|
return internalThrowThing(throwIfIllegal,
|
||||||
errorBuf,
|
errorBuf,
|
||||||
"The vowel given is not valid.");
|
"The vowel given is not valid.");
|
||||||
|
@ -848,9 +909,6 @@ public class LegalTshegBar
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
DLC add a method giving the correct connective case thingy or
|
|
||||||
throwing error if the 'i suffix already appears.
|
|
||||||
|
|
||||||
DLC put in a method that gets pronunciation using Unicode
|
DLC put in a method that gets pronunciation using Unicode
|
||||||
diacritical marks. And another using just US Roman. Note that
|
diacritical marks. And another using just US Roman. Note that
|
||||||
pronunciation is contextual, so have these methods return all
|
pronunciation is contextual, so have these methods return all
|
||||||
|
@ -875,7 +933,7 @@ public class LegalTshegBar
|
||||||
boolean disambiguatorNeeded = false;
|
boolean disambiguatorNeeded = false;
|
||||||
char prefix = getPrefix();
|
char prefix = getPrefix();
|
||||||
sb.append(UnicodeCodepointToThdlWylie.getThdlWylieForUnicodeCodepoint(prefix));
|
sb.append(UnicodeCodepointToThdlWylie.getThdlWylieForUnicodeCodepoint(prefix));
|
||||||
if (!hasHeadLetter()) {
|
if (!hasHeadLetter() && !hasSubjoinedLetter()) {
|
||||||
if (EWC_ya == rootLetter) {
|
if (EWC_ya == rootLetter) {
|
||||||
if (isConsonantThatTakesYaBtags(prefix))
|
if (isConsonantThatTakesYaBtags(prefix))
|
||||||
disambiguatorNeeded = true;
|
disambiguatorNeeded = true;
|
||||||
|
@ -891,7 +949,7 @@ public class LegalTshegBar
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (disambiguatorNeeded)
|
if (disambiguatorNeeded)
|
||||||
sb.append(TibetanMachineWeb.WYLIE_DISAMBIGUATING_KEY);
|
sb.append(THDLWylieConstants.WYLIE_DISAMBIGUATING_KEY);
|
||||||
}
|
}
|
||||||
if (hasHeadLetter())
|
if (hasHeadLetter())
|
||||||
sb.append(UnicodeCodepointToThdlWylie.getThdlWylieForUnicodeCodepoint(getHeadLetter()));
|
sb.append(UnicodeCodepointToThdlWylie.getThdlWylieForUnicodeCodepoint(getHeadLetter()));
|
||||||
|
@ -914,14 +972,14 @@ public class LegalTshegBar
|
||||||
|
|
||||||
// DLC FIXME: are these allowed in legal Tibetan?
|
// DLC FIXME: are these allowed in legal Tibetan?
|
||||||
// EWTS would have special cases for them if so,
|
// EWTS would have special cases for them if so,
|
||||||
// I'd wager...
|
// I'd wager, so I bet they're not.
|
||||||
sb.append(UnicodeCodepointToThdlWylie.getThdlWylieForUnicodeCodepoint(EW_achung));
|
sb.append(UnicodeCodepointToThdlWylie.getThdlWylieForUnicodeCodepoint(EW_achung_vowel));
|
||||||
sb.append(UnicodeCodepointToThdlWylie.getThdlWylieForUnicodeCodepoint(getVowel()));
|
sb.append(UnicodeCodepointToThdlWylie.getThdlWylieForUnicodeCodepoint(getVowel()));
|
||||||
} else {
|
} else {
|
||||||
ThdlDebug.abort("only simple vowels occur in this class, how did this get past internalLegalityTest(..)?");
|
ThdlDebug.abort("only simple vowels occur in this class, how did this get past internalLegalityTest(..)?");
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
sb.append(UnicodeCodepointToThdlWylie.getThdlWylieForUnicodeCodepoint(EW_achung));
|
sb.append(UnicodeCodepointToThdlWylie.getThdlWylieForUnicodeCodepoint(EW_achung_vowel));
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (hasExplicitVowel())
|
if (hasExplicitVowel())
|
||||||
|
@ -930,19 +988,34 @@ public class LegalTshegBar
|
||||||
sb.append("a");
|
sb.append("a");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
String suf = null;
|
||||||
if (hasSuffix()) {
|
if (hasSuffix()) {
|
||||||
String suf = getSuffix();
|
suf = getSuffix();
|
||||||
sb.append(UnicodeCodepointToThdlWylie.getThdlWylieForUnicodeCodepoint(suf.charAt(0)));
|
|
||||||
if (suf.length() > 1) {
|
if (suf.length() > 1) {
|
||||||
// DLC assert, don't verify, that the length is two.
|
// pa'am, not pa'm or pa'ama!
|
||||||
// This could change if I learn of more suffix
|
sb.append(getTHDLWylieForOddballSuffix(suf));
|
||||||
// particles.
|
} else {
|
||||||
ThdlDebug.verify(2 == suf.length());
|
sb.append(UnicodeCodepointToThdlWylie.getThdlWylieForUnicodeCodepoint(suf.charAt(0)));
|
||||||
sb.append(UnicodeCodepointToThdlWylie.getThdlWylieForUnicodeCodepoint(suf.charAt(1)));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (hasPostsuffix())
|
if (hasPostsuffix()) {
|
||||||
|
// lar.d, la-ra-da, needs a disambiguator. EWC_sa doesn't
|
||||||
|
// take any head letters, but EWC_da does.
|
||||||
|
boolean disambiguatorNeeded = false;
|
||||||
|
if (getPostsuffix() == EWC_da) {
|
||||||
|
if (suf.length() == 1) {
|
||||||
|
char simpleSuffix = suf.charAt(0);
|
||||||
|
if (EWC_ra == simpleSuffix
|
||||||
|
|| EWC_la == simpleSuffix
|
||||||
|
|| EWC_sa == simpleSuffix) {
|
||||||
|
disambiguatorNeeded = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (disambiguatorNeeded)
|
||||||
|
sb.append(THDLWylieConstants.WYLIE_DISAMBIGUATING_KEY);
|
||||||
sb.append(UnicodeCodepointToThdlWylie.getThdlWylieForUnicodeCodepoint(getPostsuffix()));
|
sb.append(UnicodeCodepointToThdlWylie.getThdlWylieForUnicodeCodepoint(getPostsuffix()));
|
||||||
|
}
|
||||||
return sb;
|
return sb;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -987,7 +1060,7 @@ public class LegalTshegBar
|
||||||
? "hasAChungOnRootLetter=\"true\""
|
? "hasAChungOnRootLetter=\"true\""
|
||||||
: "")
|
: "")
|
||||||
|
|
||||||
// DLC NOW: what about the root letter a, i.e. \u0F68 ? do we want the EWTS to be 'aa' ?
|
// DLC NOW FIXME: what about the root letter a, i.e. \u0F68 ? do we want the EWTS to be 'aa' ?
|
||||||
+ ("vowel=\""
|
+ ("vowel=\""
|
||||||
+ (hasExplicitVowel()
|
+ (hasExplicitVowel()
|
||||||
? UnicodeCodepointToThdlWylie.getThdlWylieForUnicodeCodepoint(getVowel())
|
? UnicodeCodepointToThdlWylie.getThdlWylieForUnicodeCodepoint(getVowel())
|
||||||
|
@ -1019,7 +1092,8 @@ public class LegalTshegBar
|
||||||
sb.append(getPrefix());
|
sb.append(getPrefix());
|
||||||
}
|
}
|
||||||
if (hasHeadLetter()) {
|
if (hasHeadLetter()) {
|
||||||
// DLC FIXME this crap won't be true...
|
// DLC NOW FIXME this crap won't be true... it's what we must
|
||||||
|
// convert to, though. Do it.
|
||||||
ThdlDebug.verify(UnicodeUtils.isNonSubjoinedConsonant(getPrefix()));
|
ThdlDebug.verify(UnicodeUtils.isNonSubjoinedConsonant(getPrefix()));
|
||||||
ThdlDebug.verify(UnicodeUtils.isSubjoinedConsonant(getRootLetter()));
|
ThdlDebug.verify(UnicodeUtils.isSubjoinedConsonant(getRootLetter()));
|
||||||
sb.append(getHeadLetter());
|
sb.append(getHeadLetter());
|
||||||
|
@ -1036,8 +1110,8 @@ public class LegalTshegBar
|
||||||
sb.append(EWSUB_wa_zur);
|
sb.append(EWSUB_wa_zur);
|
||||||
}
|
}
|
||||||
if (hasAChungOnRootLetter()) {
|
if (hasAChungOnRootLetter()) {
|
||||||
ThdlDebug.verify('\u0F71' == EW_achung);
|
ThdlDebug.verify('\u0F71' == EW_achung_vowel);
|
||||||
sb.append(EW_achung);
|
sb.append(EW_achung_vowel);
|
||||||
}
|
}
|
||||||
if (hasExplicitVowel()) {
|
if (hasExplicitVowel()) {
|
||||||
sb.append(getVowel());
|
sb.append(getVowel());
|
||||||
|
|
|
@ -38,8 +38,64 @@ public class LegalTshegBarTest extends TestCase implements UnicodeConstants {
|
||||||
junit.textui.TestRunner.run(LegalTshegBarTest.class);
|
junit.textui.TestRunner.run(LegalTshegBarTest.class);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Tests the getThdlWylie() method to see if we
|
||||||
|
handle "le'u'i'o", "sgom pa'am", "sgom pa'ang", etc.
|
||||||
|
*/
|
||||||
|
public void testGetThdlWylieForLongSuffixLikeThings() {
|
||||||
|
assertTrue(new LegalTshegBar(EW_ABSENT, EW_ABSENT, EWC_la,
|
||||||
|
EW_ABSENT, false, false,
|
||||||
|
new String(new char[] {
|
||||||
|
EWC_achung, EWV_u,
|
||||||
|
EWC_achung, EWV_i,
|
||||||
|
EWC_achung, EWV_o
|
||||||
|
}),
|
||||||
|
EW_ABSENT, EWV_e).getThdlWylie().toString().equals("le'u'i'o"));
|
||||||
|
assertTrue(new LegalTshegBar(EW_ABSENT, EW_ABSENT, EWC_la,
|
||||||
|
EW_ABSENT, false, false,
|
||||||
|
new String(new char[] {
|
||||||
|
EWC_achung, EWV_u,
|
||||||
|
EWC_achung, EWV_i,
|
||||||
|
EWC_achung, EWV_o,
|
||||||
|
EWC_achung, EWC_ma,
|
||||||
|
EWC_achung, EWC_nga,
|
||||||
|
EWC_achung, EWV_o,
|
||||||
|
EWC_achung, EWC_ma
|
||||||
|
}),
|
||||||
|
EW_ABSENT, EW_ABSENT).getThdlWylie().toString().equals("la'u'i'o'am'ang'o'am"));
|
||||||
|
assertTrue(new LegalTshegBar(EW_ABSENT, EW_ABSENT, EWC_pa,
|
||||||
|
EW_ABSENT, false, false,
|
||||||
|
new String(new char[] { EWC_achung, EWC_ma }),
|
||||||
|
EW_ABSENT, EW_ABSENT).getThdlWylie().toString().equals("pa'am"));
|
||||||
|
assertTrue(new LegalTshegBar(EW_ABSENT, EW_ABSENT, EWC_pa,
|
||||||
|
EW_ABSENT, false, false,
|
||||||
|
new String(new char[] { EWC_achung, EWC_nga }),
|
||||||
|
EW_ABSENT, EW_ABSENT).getThdlWylie().toString().equals("pa'ang"));
|
||||||
|
}
|
||||||
|
|
||||||
/** Tests the getThdlWylie() method and one of the constructors. */
|
/** Tests the getThdlWylie() method and one of the constructors. */
|
||||||
public void testGetThdlWylie() {
|
public void testGetThdlWylie() {
|
||||||
|
// do we disambiguate when needed?
|
||||||
|
{
|
||||||
|
assertTrue(new LegalTshegBar(EW_ABSENT, EW_ABSENT, EWC_ga, EWC_ya,
|
||||||
|
false, false, EW_ABSENT, EW_ABSENT, EWV_o).getThdlWylie().toString().equals("gyo"));
|
||||||
|
assertTrue(new LegalTshegBar(EWC_ga, EW_ABSENT, EWC_ya, EW_ABSENT,
|
||||||
|
false, false, EW_ABSENT, EW_ABSENT, EWV_o).getThdlWylie().toString().equals("g.yo"));
|
||||||
|
assertTrue(new LegalTshegBar(EWC_ba, EW_ABSENT, EWC_la, EW_ABSENT,
|
||||||
|
false, false, EWC_ga, EW_ABSENT, EW_ABSENT).getThdlWylie().toString().equals("b.lag"));
|
||||||
|
assertTrue(new LegalTshegBar(EWC_ba, EW_ABSENT, EWC_la, EW_ABSENT,
|
||||||
|
false, false, EWC_ga, EWC_sa, EW_ABSENT).getThdlWylie().toString().equals("b.lags"));
|
||||||
|
assertTrue(new LegalTshegBar(EWC_ba, EW_ABSENT, EWC_ra, EW_ABSENT,
|
||||||
|
false, false, EWC_ga, EWC_da, EW_ABSENT).getThdlWylie().toString().equals("b.ragd"));
|
||||||
|
assertTrue(new LegalTshegBar(EWC_ba, EW_ABSENT, EWC_ra, EWC_la,
|
||||||
|
false, false, EWC_ga, EWC_da, EW_ABSENT).getThdlWylie().toString().equals("brlagd"));
|
||||||
|
assertTrue(new LegalTshegBar(EWC_ba, EWC_ra, EWC_ga, EW_ABSENT,
|
||||||
|
false, false, EWC_ga, EWC_da, EW_ABSENT).getThdlWylie().toString().equals("brgagd"));
|
||||||
|
assertTrue(new LegalTshegBar(EWC_ba, EWC_la, EWC_ha, EW_ABSENT,
|
||||||
|
false, false, EWC_ga, EWC_da, EW_ABSENT).getThdlWylie().toString().equals("blhagd"));
|
||||||
|
assertTrue(new LegalTshegBar(EWC_ba, EWC_la, EWC_da, EW_ABSENT,
|
||||||
|
false, false, EWC_ga, EWC_da, EW_ABSENT).getThdlWylie().toString().equals("bldagd"));
|
||||||
|
}
|
||||||
|
|
||||||
assertTrue(new LegalTshegBar(EWC_ba, EWC_sa, EWC_ga, EWC_ra,
|
assertTrue(new LegalTshegBar(EWC_ba, EWC_sa, EWC_ga, EWC_ra,
|
||||||
false, true, EWC_la, EWC_sa, EWV_o).getThdlWylie().toString().equals("bsgrAols"));
|
false, true, EWC_la, EWC_sa, EWV_o).getThdlWylie().toString().equals("bsgrAols"));
|
||||||
assertTrue(new LegalTshegBar(EWC_ba, EWC_sa, EWC_ga,
|
assertTrue(new LegalTshegBar(EWC_ba, EWC_sa, EWC_ga,
|
||||||
|
@ -81,6 +137,10 @@ public class LegalTshegBarTest extends TestCase implements UnicodeConstants {
|
||||||
EWC_la, false, false,
|
EWC_la, false, false,
|
||||||
null, EW_ABSENT, EW_ABSENT).getThdlWylie().toString().equals("sla"));
|
null, EW_ABSENT, EW_ABSENT).getThdlWylie().toString().equals("sla"));
|
||||||
|
|
||||||
|
assertTrue(new LegalTshegBar(EW_ABSENT, EW_ABSENT, EWC_pa,
|
||||||
|
EW_ABSENT, false, true,
|
||||||
|
null, EW_ABSENT, EW_ABSENT).getThdlWylie().toString().equals("pA"));
|
||||||
|
|
||||||
{
|
{
|
||||||
boolean threw = false;
|
boolean threw = false;
|
||||||
try {
|
try {
|
||||||
|
@ -159,4 +219,64 @@ public class LegalTshegBarTest extends TestCase implements UnicodeConstants {
|
||||||
}
|
}
|
||||||
assertTrue(x);
|
assertTrue(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Tests {@link
|
||||||
|
* org.thdl.tib.text.tshegbar.LegalTshegBar#getTheTenSuffixes()}. */
|
||||||
|
public void testGetTheTenSuffixes() {
|
||||||
|
String x = LegalTshegBar.getTheTenSuffixes();
|
||||||
|
assertTrue(x.length() == 10);
|
||||||
|
assertTrue(x.charAt(0) == EWC_ga);
|
||||||
|
assertTrue(x.charAt(4) == EWC_ba);
|
||||||
|
assertTrue(x.charAt(9) == EWC_sa);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Tests {@link
|
||||||
|
* org.thdl.tib.text.tshegbar.LegalTshegBar#isAchungBasedSuffix(String)}. */
|
||||||
|
public void testIsAchungBasedSuffix() {
|
||||||
|
assertTrue(LegalTshegBar.isAchungBasedSuffix(new String(new char[] {
|
||||||
|
EWC_achung, EWC_nga
|
||||||
|
})));
|
||||||
|
assertTrue(LegalTshegBar.isAchungBasedSuffix(new String(new char[] {
|
||||||
|
EWC_achung, EWC_ma
|
||||||
|
})));
|
||||||
|
assertTrue(LegalTshegBar.isAchungBasedSuffix(new String(new char[] {
|
||||||
|
EWC_achung, EWV_i
|
||||||
|
})));
|
||||||
|
assertTrue(LegalTshegBar.isAchungBasedSuffix(new String(new char[] {
|
||||||
|
EWC_achung, EWV_o
|
||||||
|
})));
|
||||||
|
assertTrue(LegalTshegBar.isAchungBasedSuffix(new String(new char[] {
|
||||||
|
EWC_achung, EWV_u
|
||||||
|
})));
|
||||||
|
assertTrue(LegalTshegBar.isAchungBasedSuffix(new String(new char[] {
|
||||||
|
EWC_achung, EWV_u,
|
||||||
|
EWC_achung, EWV_i,
|
||||||
|
EWC_achung, EWV_o
|
||||||
|
})));
|
||||||
|
assertTrue(!LegalTshegBar.isAchungBasedSuffix(new String(new char[] {
|
||||||
|
EWC_achung, EWV_u,
|
||||||
|
EWC_achung, EWV_i,
|
||||||
|
EWC_achung, EWV_o, /* no EWC_achung, */ EWC_nga
|
||||||
|
})));
|
||||||
|
|
||||||
|
// syntactically illegal, I'd bet, but our algorithm allows it:
|
||||||
|
assertTrue(LegalTshegBar.isAchungBasedSuffix(new String(new char[] {
|
||||||
|
EWC_achung, EWC_ma,
|
||||||
|
EWC_achung, EWV_i,
|
||||||
|
EWC_achung, EWV_i,
|
||||||
|
EWC_achung, EWV_i,
|
||||||
|
EWC_achung, EWV_o,
|
||||||
|
EWC_achung, EWC_nga,
|
||||||
|
EWC_achung, EWV_o
|
||||||
|
})));
|
||||||
|
|
||||||
|
assertTrue(!LegalTshegBar.isAchungBasedSuffix(new String(new char[] {
|
||||||
|
EWC_achung, EWC_la
|
||||||
|
})));
|
||||||
|
assertTrue(!LegalTshegBar.isAchungBasedSuffix(new String(new char[] {
|
||||||
|
EWC_achung, EWV_e
|
||||||
|
})));
|
||||||
|
|
||||||
|
assertTrue(!LegalTshegBar.isAchungBasedSuffix(""));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -83,20 +83,21 @@ public interface UnicodeConstants {
|
||||||
static final char EWC_za = '\u0F5F';
|
static final char EWC_za = '\u0F5F';
|
||||||
/** Note the irregular name. The Extended Wylie representation is
|
/** Note the irregular name. The Extended Wylie representation is
|
||||||
<code>'a</code>. */
|
<code>'a</code>. */
|
||||||
static final char EWC_achen = '\u0F60'; /* DLC NOW is this achen or achung? achen is EWC_a, right? comment it. replace EWC_achen everywhere if you change it. */
|
static final char EWC_achung = '\u0F60';
|
||||||
static final char EWC_ya = '\u0F61';
|
static final char EWC_ya = '\u0F61';
|
||||||
static final char EWC_ra = '\u0F62';
|
static final char EWC_ra = '\u0F62';
|
||||||
static final char EWC_la = '\u0F63';
|
static final char EWC_la = '\u0F63';
|
||||||
static final char EWC_sha = '\u0F64';
|
static final char EWC_sha = '\u0F64';
|
||||||
static final char EWC_sa = '\u0F66';
|
static final char EWC_sa = '\u0F66';
|
||||||
static final char EWC_ha = '\u0F67';
|
static final char EWC_ha = '\u0F67';
|
||||||
|
/** achen, the 30th consonant (and, some say, the fifth vowel) DLC NOW FIXME: rename to EWC_achen */
|
||||||
static final char EWC_a = '\u0F68';
|
static final char EWC_a = '\u0F68';
|
||||||
|
|
||||||
|
|
||||||
/** In the word for father, "pA lags", there is an a-chung (i.e.,
|
/** In the word for father, "pA lags", there is an a-chung (i.e.,
|
||||||
<code>\u0F71</code>). This is the constant for that little
|
<code>\u0F71</code>). This is the constant for that little
|
||||||
guy. */
|
guy. */
|
||||||
static final char EW_achung = '\u0F71';
|
static final char EW_achung_vowel = '\u0F71';
|
||||||
|
|
||||||
|
|
||||||
/* Four of the five vowels, some say, or, others say, "the four
|
/* Four of the five vowels, some say, or, others say, "the four
|
||||||
|
|
|
@ -127,11 +127,12 @@ public class UnicodeGraphemeCluster
|
||||||
/** Returns the THDL Extended Wylie transliteration of this
|
/** Returns the THDL Extended Wylie transliteration of this
|
||||||
grapheme cluster, or null if there is none (which happens for
|
grapheme cluster, or null if there is none (which happens for
|
||||||
a few Tibetan codepoints, if you'll recall). If needsVowel is
|
a few Tibetan codepoints, if you'll recall). If needsVowel is
|
||||||
true, then an "a" will be appended when there is no EW_achung
|
true, then an "a" will be appended when there is no
|
||||||
or explicit simple vowel. If there is an explicit vowel or
|
EW_achung_vowel or explicit simple vowel. If there is an
|
||||||
EW_achung, it will always be present. Note that needsVowel is
|
explicit vowel or EW_achung_vowel, it will always be present.
|
||||||
provided because btags is the preferred THDL Extended Wylie
|
Note that needsVowel is provided because btags is the
|
||||||
for the four contiguous grapheme clusters
|
preferred THDL Extended Wylie for the four contiguous grapheme
|
||||||
|
clusters
|
||||||
<code>"\u0F56\u0F4F\u0F42\u0F66"</code>, and
|
<code>"\u0F56\u0F4F\u0F42\u0F66"</code>, and
|
||||||
needsVowel must be set to false for all but the grapheme
|
needsVowel must be set to false for all but the grapheme
|
||||||
cluster corresponding to <code>\u0F4F</code> if you wish
|
cluster corresponding to <code>\u0F4F</code> if you wish
|
||||||
|
@ -257,7 +258,7 @@ public class UnicodeGraphemeCluster
|
||||||
/** Returns the <i>height</i> for the Tibetan Unicode codepoint x.
|
/** Returns the <i>height</i> for the Tibetan Unicode codepoint x.
|
||||||
This relative height is 0 for a base consonant, digit,
|
This relative height is 0 for a base consonant, digit,
|
||||||
punctuation, mark, or sign. It is -1 for a subjoined
|
punctuation, mark, or sign. It is -1 for a subjoined
|
||||||
consonant, -2 for EWSUB_wa_zur, -3 for EW_achung, +1 for
|
consonant, -2 for EWSUB_wa_zur, -3 for EW_achung_vowel, +1 for
|
||||||
EWV_gigu, and so on according to the height these codepoints
|
EWV_gigu, and so on according to the height these codepoints
|
||||||
appear relative to one another when on the same stack. If two
|
appear relative to one another when on the same stack. If two
|
||||||
codepoints have equal height, they should not exist in the
|
codepoints have equal height, they should not exist in the
|
||||||
|
|
Loading…
Reference in a new issue