Made Jskad smart enough to handle a few more EWTS characters; some
it can only convert to Wylie, others are live key sequences. This will make converting the shechen documents go more smoothly.
This commit is contained in:
parent
09a55110b7
commit
189fef9aec
2 changed files with 72 additions and 35 deletions
|
@ -233,6 +233,23 @@ public class TibetanMachineWeb implements THDLWylieConstants {
|
|||
}
|
||||
}
|
||||
|
||||
/** Returns the next token in st with the first occurrence of
|
||||
__TILDE__ replaced with ~. Needed because the DELIMITER is ~.
|
||||
Appends the escaped token to sb iff an escape sequence
|
||||
occurred. */
|
||||
private static String getEscapedToken(StringTokenizer st,
|
||||
StringBuffer sb) {
|
||||
String unescaped = st.nextToken();
|
||||
int start;
|
||||
if ((start = unescaped.indexOf("__TILDE__")) >= 0) {
|
||||
StringBuffer x = new StringBuffer(unescaped);
|
||||
x.replace(start, "__TILDE__".length(), "~");
|
||||
sb.append(x.toString());
|
||||
return x.toString();
|
||||
} else {
|
||||
return unescaped;
|
||||
}
|
||||
}
|
||||
/**
|
||||
* This method reads the data file ("tibwn.ini"), constructs
|
||||
* the character, punctuation, and vowel lists, as well as
|
||||
|
@ -379,14 +396,21 @@ public class TibetanMachineWeb implements THDLWylieConstants {
|
|||
|
||||
int k = 0;
|
||||
|
||||
StringBuffer escapedToken = new StringBuffer("");
|
||||
ThdlDebug.verify(escapedToken.length() == 0);
|
||||
while (st.hasMoreTokens()
|
||||
&& (!ignore || (k <= 3 /* 3 from 'case 3:' */))) {
|
||||
String val = st.nextToken();
|
||||
String val = getEscapedToken(st, escapedToken);
|
||||
|
||||
if (val.equals(DELIMITER))
|
||||
if (val.equals(DELIMITER)
|
||||
&& escapedToken.length() == 0) {
|
||||
k++;
|
||||
} else if (!val.equals("")) {
|
||||
if (escapedToken.length() != 0) {
|
||||
escapedToken = new StringBuffer("");
|
||||
ThdlDebug.verify(escapedToken.length() == 0);
|
||||
}
|
||||
|
||||
else if (!val.equals("")) {
|
||||
switch (k) {
|
||||
case 0: //wylie key
|
||||
if (!ignore) {
|
||||
|
@ -476,8 +500,11 @@ public class TibetanMachineWeb implements THDLWylieConstants {
|
|||
tibHash.put(wylie, duffCodes);
|
||||
}
|
||||
|
||||
int font = duffCodes[2].getFontNum();
|
||||
int code = duffCodes[2].getCharNum()-32;
|
||||
if (null == duffCodes[TMW])
|
||||
throw new Error(fileName
|
||||
+ " has a line with wylie " + wylie + " but no TMW; that's not allowed");
|
||||
int font = duffCodes[TMW].getFontNum();
|
||||
int code = duffCodes[TMW].getCharNum()-32;
|
||||
toHashKey[font][code] = wylie;
|
||||
}
|
||||
}
|
||||
|
@ -863,9 +890,8 @@ public static boolean hasGlyph(String hashKey) {
|
|||
*/
|
||||
public static DuffCode getGlyph(String hashKey) {
|
||||
DuffCode[] dc = (DuffCode[])tibHash.get(hashKey);
|
||||
// If dc is null, then likely you misconfigured tibwn.ini such
|
||||
// that, say, M is expected (i.e., it is listed as,
|
||||
// e.g. punctuation), but no 'M~...' line appears.
|
||||
if (null == dc)
|
||||
throw new Error("It is likely that you misconfigured tibwn.ini such that, say, M is expected (i.e., it is listed as, e.g. punctuation), but no 'M~...' line appears.");
|
||||
return dc[TMW];
|
||||
}
|
||||
|
||||
|
|
|
@ -21,13 +21,13 @@ k,kh,g,ng,c,ch,j,ny,t,th,d,n,p,ph,b,m,ts,tsh,dz,w,zh,z,',y,r,l,sh,s,h,a,T,Th,D,N
|
|||
a,i,u,e,o,I,U,ai,au,A,-i,-I
|
||||
|
||||
<?Other?>
|
||||
_, ,/,|,!,:,;,@,#,$,%,(,),H,M,`,&,@#
|
||||
// FIXME: add these etc.: M^,<,>,{,},[,],?,~
|
||||
_, ,/,|,!,:,;,@,#,$,%,(,),H,M,`,&,@#,?,=,[,],<,>,{,},*
|
||||
// FIXME: add these etc.: M^,~,~^
|
||||
|
||||
<?Input:Punctuation?>
|
||||
//_~32,1~0,32
|
||||
// ~45,1~0,45~~~~~~~0F0B
|
||||
_~32,1~~1,32
|
||||
_~32,1~~1,32~~~~~~~0020
|
||||
~45,1~~1,45~~~~~~~0F0B
|
||||
/~202,1~~1,107~~~~~~~0F0D
|
||||
|~203,1~~1,103~~~~~~~0F11
|
||||
|
@ -42,10 +42,29 @@ $~38,5~~9,41~~~~~~~0F06
|
|||
)~209,1~~9,94~~~~~~~0F3D
|
||||
H~239,1~~8,92~~~~~~~0F7F
|
||||
M~238,1~~8,90~~~~~~~0F7E
|
||||
`~241,1~~8,94~~~~~~~0F83
|
||||
// I thought EWTS said 0F83 was M^, not `
|
||||
&~177,4~~8,93~~~~~~~0F85
|
||||
@#~201,1~~9,40~~~~~~~0F04,0F05
|
||||
// abbreviation sign:
|
||||
=~207,1~~1,106~~~~~~~0F34
|
||||
// virama:
|
||||
?~252,1~~8,105~~~~~~~0F84
|
||||
// rgya.gram.shad:
|
||||
]~48,5~~9,51~~~~~~~0F12
|
||||
// shad + single ornament:
|
||||
[~43,5~~9,47~~~~~~~0F10
|
||||
// gug.rtags.gyon:
|
||||
<~95,5~~9,95~~~~~~~0F3A
|
||||
// gug.rtags.gyas:
|
||||
>~96,5~~9,96~~~~~~~0F3B
|
||||
// yar.tshes.rtags:
|
||||
{~134,5~~10,39~~~~~~~0F3E
|
||||
// mar.tshes.rtags
|
||||
}~135,5~~10,40~~~~~~~0F3F
|
||||
// dzud.rtags.me.long.can:
|
||||
`~94,5~~9,92~~~~~~~0F13
|
||||
// hard tsheg:
|
||||
*~205,1~~1,108~~~~~~~0F0C
|
||||
|
||||
|
||||
<?Input:Tibetan?>
|
||||
// FIXME: 0F5F,0F39 might work, but the OpenType font's author must've had Dza
|
||||
|
@ -687,26 +706,30 @@ a+r+y~145,4~~8,65~1,109~8,121~1,123~1,125~8,107~8,114
|
|||
9+1/2~79,5~~9,81~~~~~~~0F32
|
||||
|
||||
<?ToWylie?>
|
||||
// FIXME: make this punctuation:
|
||||
M^~241,1~~8,94~~~~~~~0F83
|
||||
// bindu + datse + thigle (FIXME: make this punctuation):
|
||||
__TILDE__^~242,1~~8,95~~~~~~~0F82
|
||||
//punctuation
|
||||
_~32,1~~1,32
|
||||
_~32,1~~1,32~~~~~~~0020
|
||||
~45,1~~1,45~~~~~~~0F0B
|
||||
_~32,1~~2,32
|
||||
_~32,1~~2,32~~~~~~~0020
|
||||
~45,1~~2,45~~~~~~~0F0B
|
||||
_~32,2~~3,32
|
||||
_~32,2~~3,32~~~~~~~0020
|
||||
~45,2~~3,45~~~~~~~0F0B
|
||||
_~32,2~~4,32
|
||||
_~32,2~~4,32~~~~~~~0020
|
||||
~45,2~~4,45~~~~~~~0F0B
|
||||
_~32,3~~5,32
|
||||
_~32,3~~5,32~~~~~~~0020
|
||||
~45,3~~5,45~~~~~~~0F0B
|
||||
_~32,3~~6,32
|
||||
_~32,3~~6,32~~~~~~~0020
|
||||
~45,3~~6,45~~~~~~~0F0B
|
||||
_~32,4~~7,32
|
||||
_~32,4~~7,32~~~~~~~0020
|
||||
~45,4~~7,45~~~~~~~0F0B
|
||||
_~32,4~~8,32
|
||||
_~32,4~~8,32~~~~~~~0020
|
||||
~45,4~~8,45~~~~~~~0F0B
|
||||
_~32,5~~9,32
|
||||
_~32,5~~9,32~~~~~~~0020
|
||||
~45,5~~9,45~~~~~~~0F0B
|
||||
_~32,5~~10,32
|
||||
_~32,5~~10,32~~~~~~~0020
|
||||
~45,5~~10,45~~~~~~~0F0B
|
||||
|
||||
//bindus
|
||||
|
@ -895,11 +918,8 @@ nyi.zla~91,5~~9,89
|
|||
kuruka~92,5~~9,90
|
||||
no name~93,5~~9,91
|
||||
|
||||
dzud.rtags.me.long.can~94,5~~9,92~~~~~~~0F13
|
||||
dbu.khang.g-yon~208,1~~9,93~~~~~~~0F3C
|
||||
dbu.khang.g-yas~209,1~~9,94~~~~~~~0F3D
|
||||
gug.rtags.gyon~95,5~~9,95~~~~~~~0F3A
|
||||
gug.rtags.gyas~96,5~~9,96~~~~~~~0F3B
|
||||
yungs.drung (reversed)~97,5~~9,97
|
||||
yungs.drung (standard)~98,5~~9,98
|
||||
|
||||
|
@ -938,13 +958,10 @@ logo sign lhag.rtags~130,5~~10,35~~~~~~~0F16
|
|||
sgra.gcan.char.rtags~131,5~~10,36~~~~~~~0F17
|
||||
khyud.pa~132,5~~10,37~~~~~~~0F18
|
||||
sdong.tshugs~133,5~~10,38~~~~~~~0F19
|
||||
yar.tshes.rtags~134,5~~10,39~~~~~~~0F3E
|
||||
mar.tshes.rtags~135,5~~10,40~~~~~~~0F3F
|
||||
|
||||
rinchen shad~203,1~~1,103~~~~~~~0F11
|
||||
sbrul shad~204,1~~1,104~~~~~~~0F08
|
||||
gter tsheg~206,1~~1,105~~~~~~~0F14
|
||||
abbreviation sign~207,1~~1,106~~~~~~~0F34
|
||||
|
||||
utsama ka~57,2~~3,59~1,109~4,120~1,125~1,123~4,106~4,113
|
||||
utsama kha~58,2~~3,60~1,109~4,120~1,125~1,123~4,106~4,113
|
||||
|
@ -984,7 +1001,6 @@ nor.bu.bzhi.khyil~198,5~~10,70~~~~~~~0FCC
|
|||
small bindu~~~8,91~~~~~~~0F7E
|
||||
|
||||
bindu + datse~241,1~~8,94~~~~~~~0F83
|
||||
bindu + datse + thigle~242,1~~8,95~~~~~~~0F82
|
||||
bindu + kigu~243,1~~8,96
|
||||
bindu + short gigu~244,1~~8,97
|
||||
bindu + log yig gigu~245,1~~8,98
|
||||
|
@ -994,7 +1010,6 @@ bindu + double drengbu~248,1~~8,101
|
|||
bindu + normal naro~249,1~~8,102
|
||||
bindu + raised naro~250,1~~8,103
|
||||
bindu + double naro~251,1~~8,104
|
||||
virama~252,1~~8,105~~~~~~~0F84
|
||||
|
||||
zhu.yig.mgo.rgyan~33,5~~9,33~~~~~~~0F0A
|
||||
bka'.shog.mgo.rgyan~34,5~~9,34
|
||||
|
@ -1010,13 +1025,9 @@ Yig.mgo.tsheg.shad~39,5~~9,42~~~~~~~0F07
|
|||
shad + single tsheg~40,5~~9,43~~~~~~~0F0F
|
||||
shad (hooked) + single tsheg~41,5~~9,44
|
||||
shad + double tsheg~42,5~~9,46
|
||||
shad + single ornament~43,5~~9,47~~~~~~~0F10
|
||||
sbrul.shad + single ornament~44,5~~9,48
|
||||
sbrul.shad + double ornament~46,5~~9,49
|
||||
sbrul.shad variant form~47,5~~9,50
|
||||
rgya.gram.shad~48,5~~9,51~~~~~~~0F12
|
||||
|
||||
hard tsheg~205,1~~1,108~~~~~~~0F0C
|
||||
|
||||
reversed hu~147,5~~10,43
|
||||
Inverted ha~148,5~~10,44
|
||||
|
|
Loading…
Reference in a new issue