Made Jskad smart enough to handle a few more EWTS characters; some

it can only convert to Wylie, others are live key sequences.  This will make
converting the shechen documents go more smoothly.
This commit is contained in:
dchandler 2003-06-09 13:35:43 +00:00
parent 09a55110b7
commit 189fef9aec
2 changed files with 72 additions and 35 deletions

View file

@ -233,6 +233,23 @@ public class TibetanMachineWeb implements THDLWylieConstants {
}
}
/** Returns the next token in st with the first occurrence of
__TILDE__ replaced with ~. Needed because the DELIMITER is ~.
Appends the escaped token to sb iff an escape sequence
occurred. */
private static String getEscapedToken(StringTokenizer st,
StringBuffer sb) {
String unescaped = st.nextToken();
int start;
if ((start = unescaped.indexOf("__TILDE__")) >= 0) {
StringBuffer x = new StringBuffer(unescaped);
x.replace(start, "__TILDE__".length(), "~");
sb.append(x.toString());
return x.toString();
} else {
return unescaped;
}
}
/**
* This method reads the data file ("tibwn.ini"), constructs
* the character, punctuation, and vowel lists, as well as
@ -379,14 +396,21 @@ public class TibetanMachineWeb implements THDLWylieConstants {
int k = 0;
StringBuffer escapedToken = new StringBuffer("");
ThdlDebug.verify(escapedToken.length() == 0);
while (st.hasMoreTokens()
&& (!ignore || (k <= 3 /* 3 from 'case 3:' */))) {
String val = st.nextToken();
String val = getEscapedToken(st, escapedToken);
if (val.equals(DELIMITER))
if (val.equals(DELIMITER)
&& escapedToken.length() == 0) {
k++;
} else if (!val.equals("")) {
if (escapedToken.length() != 0) {
escapedToken = new StringBuffer("");
ThdlDebug.verify(escapedToken.length() == 0);
}
else if (!val.equals("")) {
switch (k) {
case 0: //wylie key
if (!ignore) {
@ -476,8 +500,11 @@ public class TibetanMachineWeb implements THDLWylieConstants {
tibHash.put(wylie, duffCodes);
}
int font = duffCodes[2].getFontNum();
int code = duffCodes[2].getCharNum()-32;
if (null == duffCodes[TMW])
throw new Error(fileName
+ " has a line with wylie " + wylie + " but no TMW; that's not allowed");
int font = duffCodes[TMW].getFontNum();
int code = duffCodes[TMW].getCharNum()-32;
toHashKey[font][code] = wylie;
}
}
@ -863,9 +890,8 @@ public static boolean hasGlyph(String hashKey) {
*/
public static DuffCode getGlyph(String hashKey) {
DuffCode[] dc = (DuffCode[])tibHash.get(hashKey);
// If dc is null, then likely you misconfigured tibwn.ini such
// that, say, M is expected (i.e., it is listed as,
// e.g. punctuation), but no 'M~...' line appears.
if (null == dc)
throw new Error("It is likely that you misconfigured tibwn.ini such that, say, M is expected (i.e., it is listed as, e.g. punctuation), but no 'M~...' line appears.");
return dc[TMW];
}

View file

@ -21,13 +21,13 @@ k,kh,g,ng,c,ch,j,ny,t,th,d,n,p,ph,b,m,ts,tsh,dz,w,zh,z,',y,r,l,sh,s,h,a,T,Th,D,N
a,i,u,e,o,I,U,ai,au,A,-i,-I
<?Other?>
_, ,/,|,!,:,;,@,#,$,%,(,),H,M,`,&,@#
// FIXME: add these etc.: M^,<,>,{,},[,],?,~
_, ,/,|,!,:,;,@,#,$,%,(,),H,M,`,&,@#,?,=,[,],<,>,{,},*
// FIXME: add these etc.: M^,~,~^
<?Input:Punctuation?>
//_~32,1~0,32
// ~45,1~0,45~~~~~~~0F0B
_~32,1~~1,32
_~32,1~~1,32~~~~~~~0020
~45,1~~1,45~~~~~~~0F0B
/~202,1~~1,107~~~~~~~0F0D
|~203,1~~1,103~~~~~~~0F11
@ -42,10 +42,29 @@ $~38,5~~9,41~~~~~~~0F06
)~209,1~~9,94~~~~~~~0F3D
H~239,1~~8,92~~~~~~~0F7F
M~238,1~~8,90~~~~~~~0F7E
`~241,1~~8,94~~~~~~~0F83
// I thought EWTS said 0F83 was M^, not `
&~177,4~~8,93~~~~~~~0F85
@#~201,1~~9,40~~~~~~~0F04,0F05
// abbreviation sign:
=~207,1~~1,106~~~~~~~0F34
// virama:
?~252,1~~8,105~~~~~~~0F84
// rgya.gram.shad:
]~48,5~~9,51~~~~~~~0F12
// shad + single ornament:
[~43,5~~9,47~~~~~~~0F10
// gug.rtags.gyon:
<~95,5~~9,95~~~~~~~0F3A
// gug.rtags.gyas:
>~96,5~~9,96~~~~~~~0F3B
// yar.tshes.rtags:
{~134,5~~10,39~~~~~~~0F3E
// mar.tshes.rtags
}~135,5~~10,40~~~~~~~0F3F
// dzud.rtags.me.long.can:
`~94,5~~9,92~~~~~~~0F13
// hard tsheg:
*~205,1~~1,108~~~~~~~0F0C
<?Input:Tibetan?>
// FIXME: 0F5F,0F39 might work, but the OpenType font's author must've had Dza
@ -687,26 +706,30 @@ a+r+y~145,4~~8,65~1,109~8,121~1,123~1,125~8,107~8,114
9+1/2~79,5~~9,81~~~~~~~0F32
<?ToWylie?>
// FIXME: make this punctuation:
M^~241,1~~8,94~~~~~~~0F83
// bindu + datse + thigle (FIXME: make this punctuation):
__TILDE__^~242,1~~8,95~~~~~~~0F82
//punctuation
_~32,1~~1,32
_~32,1~~1,32~~~~~~~0020
~45,1~~1,45~~~~~~~0F0B
_~32,1~~2,32
_~32,1~~2,32~~~~~~~0020
~45,1~~2,45~~~~~~~0F0B
_~32,2~~3,32
_~32,2~~3,32~~~~~~~0020
~45,2~~3,45~~~~~~~0F0B
_~32,2~~4,32
_~32,2~~4,32~~~~~~~0020
~45,2~~4,45~~~~~~~0F0B
_~32,3~~5,32
_~32,3~~5,32~~~~~~~0020
~45,3~~5,45~~~~~~~0F0B
_~32,3~~6,32
_~32,3~~6,32~~~~~~~0020
~45,3~~6,45~~~~~~~0F0B
_~32,4~~7,32
_~32,4~~7,32~~~~~~~0020
~45,4~~7,45~~~~~~~0F0B
_~32,4~~8,32
_~32,4~~8,32~~~~~~~0020
~45,4~~8,45~~~~~~~0F0B
_~32,5~~9,32
_~32,5~~9,32~~~~~~~0020
~45,5~~9,45~~~~~~~0F0B
_~32,5~~10,32
_~32,5~~10,32~~~~~~~0020
~45,5~~10,45~~~~~~~0F0B
//bindus
@ -895,11 +918,8 @@ nyi.zla~91,5~~9,89
kuruka~92,5~~9,90
no name~93,5~~9,91
dzud.rtags.me.long.can~94,5~~9,92~~~~~~~0F13
dbu.khang.g-yon~208,1~~9,93~~~~~~~0F3C
dbu.khang.g-yas~209,1~~9,94~~~~~~~0F3D
gug.rtags.gyon~95,5~~9,95~~~~~~~0F3A
gug.rtags.gyas~96,5~~9,96~~~~~~~0F3B
yungs.drung (reversed)~97,5~~9,97
yungs.drung (standard)~98,5~~9,98
@ -938,13 +958,10 @@ logo sign lhag.rtags~130,5~~10,35~~~~~~~0F16
sgra.gcan.char.rtags~131,5~~10,36~~~~~~~0F17
khyud.pa~132,5~~10,37~~~~~~~0F18
sdong.tshugs~133,5~~10,38~~~~~~~0F19
yar.tshes.rtags~134,5~~10,39~~~~~~~0F3E
mar.tshes.rtags~135,5~~10,40~~~~~~~0F3F
rinchen shad~203,1~~1,103~~~~~~~0F11
sbrul shad~204,1~~1,104~~~~~~~0F08
gter tsheg~206,1~~1,105~~~~~~~0F14
abbreviation sign~207,1~~1,106~~~~~~~0F34
utsama ka~57,2~~3,59~1,109~4,120~1,125~1,123~4,106~4,113
utsama kha~58,2~~3,60~1,109~4,120~1,125~1,123~4,106~4,113
@ -984,7 +1001,6 @@ nor.bu.bzhi.khyil~198,5~~10,70~~~~~~~0FCC
small bindu~~~8,91~~~~~~~0F7E
bindu + datse~241,1~~8,94~~~~~~~0F83
bindu + datse + thigle~242,1~~8,95~~~~~~~0F82
bindu + kigu~243,1~~8,96
bindu + short gigu~244,1~~8,97
bindu + log yig gigu~245,1~~8,98
@ -994,7 +1010,6 @@ bindu + double drengbu~248,1~~8,101
bindu + normal naro~249,1~~8,102
bindu + raised naro~250,1~~8,103
bindu + double naro~251,1~~8,104
virama~252,1~~8,105~~~~~~~0F84
zhu.yig.mgo.rgyan~33,5~~9,33~~~~~~~0F0A
bka'.shog.mgo.rgyan~34,5~~9,34
@ -1010,13 +1025,9 @@ Yig.mgo.tsheg.shad~39,5~~9,42~~~~~~~0F07
shad + single tsheg~40,5~~9,43~~~~~~~0F0F
shad (hooked) + single tsheg~41,5~~9,44
shad + double tsheg~42,5~~9,46
shad + single ornament~43,5~~9,47~~~~~~~0F10
sbrul.shad + single ornament~44,5~~9,48
sbrul.shad + double ornament~46,5~~9,49
sbrul.shad variant form~47,5~~9,50
rgya.gram.shad~48,5~~9,51~~~~~~~0F12
hard tsheg~205,1~~1,108~~~~~~~0F0C
reversed hu~147,5~~10,43
Inverted ha~148,5~~10,44