Jskad supports <7, >8, etc. again; it no longer supports the punctuation

'<' and '>'.  The current keyboard implementation makes this an either-or
proposition, when fundamentally it need not be.

Added a <?Numbers?> command and an <?Input:Numbers?> command to
tibwn.ini; broke the numbers apart from the consonants.  This facilitates the
new-and-improved Tibetan->Wylie conversion.

Tibetan->Wylie is now done by forming legal tsheg-bars.  A legal tsheg bar
is converted into perfect THDL Wylie.  See code comments to learn what
it thinks is a legal tsheg-bar, but it inlcudes bskyUMbsH minus the trailing
punctuation (H), e.g.

Illegal sequences, such as runs of transliterated Sanskrit, are turned into
unambiguous Wylie; each glyph is followed by a vowel or a disambiguator
('.').

I've made it so that the illegal sequences are as beautiful as possible.  You
get 'pad+me', for example, not the equivalent but uglier 'pad+m.e.'.
This commit is contained in:
dchandler 2003-07-08 14:30:17 +00:00
parent c04a3f189b
commit 02558a1d78
6 changed files with 878 additions and 479 deletions

View file

@ -42,7 +42,7 @@ public class DuffPaneTest extends TestCase {
// We don't want to load the TM or TMW font files ourselves:
ThdlOptions.setUserPreference("thdl.rely.on.system.tmw.fonts", true);
ThdlOptions.setUserPreference("thdl.do.not.rely.on.system.tm.fonts", false);
ThdlOptions.setUserPreference("thdl.rely.on.system.tm.fonts", true);
ThdlOptions.setUserPreference("thdl.debug", true);
dp = new DuffPane();
@ -102,11 +102,23 @@ public class DuffPaneTest extends TestCase {
* and then converting the result to Extended Wylie. */
public void testWylieToIRToWylie() {
ensureKeysGiveCorrectWylie("kue ");
ensureKeysGiveCorrectWylie("<8<7<0 ");
ensureKeysGiveCorrectWylie("012345678901234 ");
ensureKeysGiveCorrectWylie("ka<7 ",
"ka<7. ");
ensureKeysGiveCorrectWylie("ka <7 ");
ensureKeysGiveCorrectWylie("ka>7 ",
"ka>7. ");
ensureKeysGiveCorrectWylie("ka >7 ");
// DLC FIXME : M^ doesn't work. nga, na do, k,kh do, why not M, M^?
ensureKeysGiveCorrectWylie("kuau ");
ensureKeysGiveCorrectWylie("ku-i ");
ensureKeysGiveCorrectWylie("kuai ");
ensureKeysGiveCorrectWylie("cuig ");
ensureKeysGiveCorrectWylie("kcuig ");
ensureKeysGiveCorrectWylie("kcuig ",
"kacuiga ");
ensureKeysGiveCorrectWylie("gcuig ");
ensureKeysGiveCorrectWylie("gcuigs'e'i'i'o'am'ang'e'o'u'am'am ");
ensureKeysGiveCorrectWylie("nga ");
ensureKeysGiveCorrectWylie("nga /");
@ -144,17 +156,20 @@ public class DuffPaneTest extends TestCase {
ensureKeysGiveCorrectWylie("blar.d");
ensureKeysGiveCorrectWylie("blarad",
"blar.d");
ensureKeysGiveCorrectWylie("b.lard");
ensureKeysGiveCorrectWylie("b.lard",
"balarda");
ensureKeysGiveCorrectWylie("b.lal.d");
ensureKeysGiveCorrectWylie("blald",
"blalda");
ensureKeysGiveCorrectWylie("b.lald");
ensureKeysGiveCorrectWylie("b.lald",
"balalda");
ensureKeysGiveCorrectWylie("b.las.d");
ensureKeysGiveCorrectWylie("blasd",
"blasda");
ensureKeysGiveCorrectWylie("b.lasd");
ensureKeysGiveCorrectWylie("b.lasd",
"balasda");
ensureKeysGiveCorrectWylie("b.lag");
ensureKeysGiveCorrectWylie("blg",
@ -233,7 +248,7 @@ public class DuffPaneTest extends TestCase {
ensureKeysGiveCorrectWylie("b.lags");
ensureKeysGiveCorrectWylie("blags");
// DLC add b-r-g-s, b-l-g-s,
// DLC FIXME: add b-r-g-s, b-l-g-s, etc.
ensureKeysGiveCorrectWylie("mngas",
@ -268,15 +283,43 @@ public class DuffPaneTest extends TestCase {
ensureKeysGiveCorrectWylie("skalazasa");
ensureKeysGiveCorrectWylie("jskad",
"jaskada");
}
{
// These are incorrectly handled in terms of
// makeIllegalTibetanGoEndToEnd. DLC FIXME.
ensureKeysGiveCorrectWylie("jeskad",
"jeskd");
ensureKeysGiveCorrectWylie("jeskd");
"jeskada");
ensureKeysGiveCorrectWylie("jeskd",
"jesakada");
ensureKeysGiveCorrectWylie("jesakada",
"jeskd");
"jesakada");
}
{
// DLC FIXME: ai gives a.ai, a.i is required to get ai.
// DLC FIXME: haaa doesn't get you h.a., neither does
// ha.a; achen is tough to get.
}
ensureKeysGiveCorrectWylie("heM hiM h-iM heM haiM hoM hauM hUM ");
ensureKeysGiveCorrectWylie("hi.M ho.M he.M hu.M",
"hiM hoM heM huM");
ensureKeysGiveCorrectWylie("brgwU-imd");
ensureKeysGiveCorrectWylie("pad+me");
ensureKeysGiveCorrectWylie("pad+men+b+h+yuM");
ensureKeysGiveCorrectWylie("bskyUMbs");
ensureKeysGiveCorrectWylie("bskyUMbsHgro ");
ensureKeysGiveCorrectWylie("favakakhagangacachajanyatathadanapaphabamatsatshadzawazhaza'ayaralashasahaTaThaDaNaSha");
ensureKeysGiveCorrectWylie("fevekekhegengecechejenyetethedenepephebemetsetshedzewezheze'eyerelesheseheTeTheDeNeShe");
ensureKeysGiveCorrectWylie("fuvukukhugungucuchujunyututhudunupuphubumutsutshudzuwuzhuzu'uyurulushusuhuTuThuDuNuShu");
ensureKeysGiveCorrectWylie("fovokokhogongocochojonyotothodonopophobomotsotshodzowozhozo'oyoroloshosohoToThoDoNoSho");
ensureKeysGiveCorrectWylie("faivaikaikhaigaingaicaichaijainyaitaithaidainaipaiphaibaimaitsaitshaidzaiwaizhaizai'aiyairailaishaisaihaiTaiThaiDaiNaiShai");
ensureKeysGiveCorrectWylie("fauvaukaukhaugaungaucauchaujaunyautauthaudaunaupauphaubaumautsautshaudzauwauzhauzau'auyauraulaushausauhauTauThauDauNauShau");
ensureKeysGiveCorrectWylie("fivikikhigingicichijinyitithidinipiphibimitsitshidziwizhizi'iyirilishisihiTiThiDiNiShi");
ensureKeysGiveCorrectWylie("don't touch my coffee/that makes me very angry/supersize my drink",
"dona'ata tocha mya cofafe/thata mkes me veraya angaraya/superasize mya drinaka");
}
}

View file

@ -152,7 +152,11 @@ public final class DuffCode {
/**
* @return a string representation of this object */
public String toString() {
return "<duffcode font=" + fontNum
boolean[] err = new boolean[] { false };
String wylie = TibetanMachineWeb.getWylieForGlyph(this, err);
if (err[0]) wylie = "undefined";
return "<duffcode wylie="
+ wylie + " font=" + fontNum
+ " charNum=" + charNum + " character="
+ new Character(getCharacter()).toString() + "/>";
}
@ -160,7 +164,11 @@ public final class DuffCode {
* @param TMW if this DuffCode represents a TMW glyph, not a TM glyph
* @return a string representation of this object */
public String toString(boolean TMW) {
return "<duffcode font="
boolean[] err = new boolean[] { false };
String wylie = TibetanMachineWeb.getWylieForGlyph(this, err);
if (err[0]) wylie = "undefined";
return "<duffcode wylie="
+ wylie + " font="
+ (TMW
? TibetanMachineWeb.tmwFontNames
: TibetanMachineWeb.tmFontNames)[fontNum]

View file

@ -41,6 +41,12 @@ public interface THDLWylieConstants {
* the Wylie disambiguating key, as a char
*/
public static final char WYLIE_DISAMBIGUATING_KEY = '.';
/**
* the Wylie disambiguating key, as a String
*/
public static final String WYLIE_DISAMBIGUATING_KEY_STRING
= new String(new char[] { WYLIE_DISAMBIGUATING_KEY });
/**
* the Wylie for the invisible 'a' vowel
*/

File diff suppressed because it is too large Load diff

View file

@ -60,6 +60,9 @@ public class TibetanMachineWeb implements THDLWylieConstants {
private static TibetanKeyboard keyboard = null;
private static Set charSet = null;
private static Set tibSet = null;
private static Set sanskritStackSet = null;
private static Set numberSet = null;
private static Set vowelSet = null;
private static Set puncSet = null;
private static Set topSet = null;
@ -346,26 +349,64 @@ public class TibetanMachineWeb implements THDLWylieConstants {
}
String line;
boolean hashOn = false;
boolean isSanskrit = false; //FIXME: this is never read.
// is this a Tibetan consonant or consonant stack?
boolean isTibetan = false;
// is this a Sanskrit consonant stack?
boolean isSanskrit = false;
boolean ignore = false;
tibSet = new HashSet();
sanskritStackSet = new HashSet();
while ((line = in.readLine()) != null) {
if (line.startsWith("<?")) { //line is command
if (line.equalsIgnoreCase("<?Consonants?>")) {
isSanskrit = false;
isTibetan = true;
hashOn = false;
ignore = false;
line = in.readLine();
charSet = new HashSet();
if (null == charSet) charSet = new HashSet();
StringTokenizer st = new StringTokenizer(line,",");
while (st.hasMoreTokens()) {
String ntk;
charSet.add(ntk = st.nextToken());
tibSet.add(ntk);
validInputSequences.put(ntk, anyOldObjectWillDo);
}
}
else if (line.equalsIgnoreCase("<?Numbers?>")) {
// FIXME: for historical reasons, numbers go
// in both charSet and numberSet.
isSanskrit = false;
isTibetan = false;
hashOn = false;
ignore = false;
line = in.readLine();
if (null == charSet) charSet = new HashSet();
numberSet = new HashSet();
StringTokenizer st = new StringTokenizer(line,",");
while (st.hasMoreTokens()) {
String ntk;
// DLC FIXME: don't add it to numberSet
// and charSet here; do it in
// <?Input:Numbers?> so that Jskad has the
// same TMW->Wylie conversion regardless
// of whether or not it chooses to support
// inputting numbers.
numberSet.add(ntk = st.nextToken());
charSet.add(ntk);
validInputSequences.put(ntk, anyOldObjectWillDo);
}
}
else if (line.equalsIgnoreCase("<?Vowels?>")) {
isSanskrit = false;
isTibetan = false;
hashOn = false;
ignore = false;
line = in.readLine();
vowelSet = new HashSet();
StringTokenizer st = new StringTokenizer(line,",");
@ -377,7 +418,9 @@ public class TibetanMachineWeb implements THDLWylieConstants {
}
else if (line.equalsIgnoreCase("<?Other?>")) {
isSanskrit = false;
isTibetan = false;
hashOn = false;
ignore = false;
line = in.readLine();
puncSet = new HashSet();
StringTokenizer st = new StringTokenizer(line,",");
@ -389,29 +432,47 @@ public class TibetanMachineWeb implements THDLWylieConstants {
}
else if (line.equalsIgnoreCase("<?Input:Punctuation?>")
|| line.equalsIgnoreCase("<?Input:Vowels?>")
|| line.equalsIgnoreCase("<?Input:Tibetan?>")) {
|| line.equalsIgnoreCase("<?Input:Vowels?>")) {
isSanskrit = false;
isTibetan = false;
hashOn = true;
ignore = false;
}
else if (line.equalsIgnoreCase("<?Input:Tibetan?>")) {
isSanskrit = false;
isTibetan = true;
hashOn = true;
ignore = false;
}
else if (line.equalsIgnoreCase("<?Input:Numbers?>")) {
isSanskrit = false;
isTibetan = false;
hashOn = true;
ignore = false;
}
else if (line.equalsIgnoreCase("<?Input:Sanskrit?>")) {
isSanskrit = true;
isTibetan = false;
hashOn = true;
ignore = false;
}
else if (line.equalsIgnoreCase("<?ToWylie?>")) {
isSanskrit = false;
isTibetan = false;
hashOn = false;
ignore = false;
}
else if (line.equalsIgnoreCase("<?Ignore?>"))
else if (line.equalsIgnoreCase("<?Ignore?>")) {
isSanskrit = false;
ignore = true;
}
}
else if (line.startsWith("//")) //comment
else if (line.startsWith("//")) { //comment
;
else if (line.equals("")) //empty string
}
else if (line.equals("")) {//empty string
;
}
else {
StringTokenizer st = new StringTokenizer(line,DELIMITER,true);
@ -559,6 +620,21 @@ public class TibetanMachineWeb implements THDLWylieConstants {
if (hashOn) {
tibHash.put(wylie, duffCodes);
}
if (isTibetan) {
// Delete the dashes:
StringBuffer wylieWithoutDashes = new StringBuffer(wylie);
for (int wl = 0; wl < wylieWithoutDashes.length(); wl++) {
if (wylieWithoutDashes.charAt(wl) == '-') {
wylieWithoutDashes.deleteCharAt(wl);
--wl;
}
}
tibSet.add(wylieWithoutDashes.toString());
}
if (isSanskrit) {
sanskritStackSet.add(wylie);
}
if (null == duffCodes[TMW])
throw new Error(fileName
@ -726,13 +802,13 @@ public static boolean isFormatting(char c) {
}
/**
* Checks to see if the passed string
* is a character in the installed keyboard.
* Checks to see if the passed string is a character (a single
* [possibly Sanskrit or va or fa] consonant or a number [possibly
* super- or subscribed]) in the installed keyboard.
*
* @param s the string you want to check
* @return true if s is a character in the current keyboard,
* false if not
*/
* @return true if s is a character in the current keyboard, false if
* not */
public static boolean isChar(String s) {
if (currentKeyboardIsExtendedWylie())
return charSet.contains(s);
@ -741,16 +817,58 @@ public static boolean isChar(String s) {
}
/**
* Checks to see if the passed string
* is a character in Extended Wylie.
* Checks to see if the passed string is a character (a single
* [possibly Sanskrit or va or fa] consonant or a number [possibly
* super- or subscribed]) in Extended Wylie.
* @param s the string to be checked
* @return true if s is a character in
* Extended Wylie transliteration, false if not
*/
* @return true if s is a character in Extended Wylie transliteration,
* false if not */
public static boolean isWylieChar(String s) {
return charSet.contains(s);
}
/**
* Checks to see if the passed string is a consonant or unadorned
* consonant stack in Extended Wylie.
* @param s the string to be checked
* @return true if s is such in Extended Wylie transliteration, false
* if not */
public static boolean isWylieTibetanConsonantOrConsonantStack(String s) {
return tibSet.contains(s);
}
/**
* Returns true if and only if s is the THDL Extended Wylie for a
* Sanskrit multi-consonant stack.
*/
public static boolean isWylieSanskritConsonantStack(String s) {
return sanskritStackSet.contains(s);
}
/** Returns true if and only if s is the THDL Extended Wylie
representation of a legal tsheg-bar appendage 'i, 'e, 'u, 'o, 'am,
or 'ang. The word le'u (chapter) contains such an appendage,
e.g. */
public static boolean isWylieAchungAppendage(String s) {
return (s.equals("'e")
|| s.equals("'i")
|| s.equals("'o")
|| s.equals("'u")
|| s.equals("'ang")
|| s.equals("'am"));
}
/**
* Checks to see if the passed string is a number [possibly super- or
* subscribed]) in Extended Wylie.
* @param s the string to be checked
* @return true if s is a number in Extended Wylie transliteration,
* false if not */
public static boolean isWylieNumber(String s) {
return numberSet.contains(s);
}
/**
* Checks to see if the passed string
* is punctuation in the installed keyboard.
@ -826,6 +944,32 @@ public static boolean isWylieVowel(String s) {
return vowelSet.contains(s);
}
/** Returns true if and only if wylie is the THDL Extended Wylie for
an adornment. An adornment is something that is part of a stack
but is not a consonant, such as a Tibetan or Sanskrit vowel or a
bindu. Note that an adornment might be both an adornment and a
vowel, or an adornment and punctuation. */
public static boolean isWylieAdornment(String wylie) {
return (vowelSet.contains(wylie)
|| (wylie.equals("M") /* U+0F7E */
|| wylie.equals("M^") /* U+0F83 */
|| wylie.equals("iM")
|| wylie.equals("-iM")
|| wylie.equals("eM")
|| wylie.equals("aiM")
|| wylie.equals("oM")
|| wylie.equals("auM")));
}
/** Returns true if and only if wylie is the THDL Extended Wylie for
an adornment {@link #isWylieAdornment(String)} that contains a
vowel within it. */
public static boolean isWylieAdornmentAndContainsVowel(String wylie) {
return (isWylieAdornment(wylie) &&
!wylie.equals("M") /* U+0F7E */
&& !wylie.equals("M^") /* U+0F83 */);
}
/**
* Returns true iff this Wylie is valid as a leftmost character in a
* Tibetan syllable. For example, in the syllable 'brgyad', 'b' is the
@ -839,9 +983,9 @@ public static boolean isWylieLeft(String s) {
}
/**
* Returns true iff this Wylie is valid as a right (post-vowel)
* character in a Tibetan syllable. For example, in the syllable
* 'lags', 'g' is in the right character position. Valid right
* Returns true iff this Wylie is valid as a suffix (i.e., a right
* (post-vowel) character) in a Tibetan syllable. For example, in the
* syllable 'lags', 'g' is in the right character position. Valid right
* characters include g, ng, d, n, b, m, r, l, s, ', and T.
* @param s the (Wylie) string to be checked
* @return true if s is a possible right character in a Tibetan

View file

@ -7,22 +7,27 @@
// - <?x?> marks a command
// - the commands are:
// Consonants - set of consonants in tibetan
// Numbers - set of numbers in tibetan
// Vowels - set of vowels
// Other - other characters: numbers, punctuation, etc.
// Other - other characters: punctuation, etc.
// Input - those codes which serve basis for wylie input method
// subtypes: Input:Punctuation, Input:Vowels, Input:Tibetan, Input:Sanskrit
// subtypes: Input:Punctuation, Input:Vowels, Input:Tibetan,
// Input:Numbers, Input:Sanskrit
// ToWylie - codes only needed for duff to wylie conversion, including vowels
// Ignore - ignore until another command is reached
<?Consonants?>
k,kh,g,ng,c,ch,j,ny,t,th,d,n,p,ph,b,m,ts,tsh,dz,w,zh,z,',y,r,l,sh,s,h,a,T,Th,D,N,Sh,v,f,Dz,0,1,2,3,4,5,6,7,8,9,>0,>1,>2,>3,>4,>5,>6,>7,>8,>9,<0,<1,<2,<3,<4,<5,<6,<7,<8,<9
k,kh,g,ng,c,ch,j,ny,t,th,d,n,p,ph,b,m,ts,tsh,dz,w,zh,z,',y,r,l,sh,s,h,a,T,Th,D,N,Sh,v,f,Dz
<?Numbers?>
0,1,2,3,4,5,6,7,8,9,>0,>1,>2,>3,>4,>5,>6,>7,>8,>9,<0,<1,<2,<3,<4,<5,<6,<7,<8,<9
<?Vowels?>
a,i,u,e,o,I,U,ai,au,A,-i,-I
<?Other?>
_, ,/,|,!,:,;,@,#,$,%,(,),H,M,`,&,@#,?,=,[,],<,>,{,},*
// FIXME: add these etc.: M^,~,~^
_, ,/,|,!,:,;,@,#,$,%,(,),H,M,`,&,@#,?,=,[,],{,},*
// FIXME: add these etc.: M^,~,~^,<,> (< and > cause ka<7 to quit working)
<?Input:Punctuation?>
//_~32,1~0,32
@ -691,6 +696,8 @@ a+y~143,4~~8,63~1,109~8,120~1,123~1,125~8,106~8,113~f68,fb1
a+r~144,4~~8,64~1,109~8,120~1,123~1,125~8,106~8,113~f68,fb2
a+r+y~145,4~~8,65~1,109~8,121~1,123~1,125~8,107~8,114~f68,fb2,fb1
<?Input:Numbers?>
//numbers
0~190,1~~10,48~~~~~~~0F20
1~191,1~~10,49~~~~~~~0F21