diff --git a/source/org/thdl/tib/text/TibetanMachineWeb.java b/source/org/thdl/tib/text/TibetanMachineWeb.java index 00fd72e..de5fcdd 100644 --- a/source/org/thdl/tib/text/TibetanMachineWeb.java +++ b/source/org/thdl/tib/text/TibetanMachineWeb.java @@ -233,6 +233,23 @@ public class TibetanMachineWeb implements THDLWylieConstants { } } + /** Returns the next token in st with the first occurrence of + __TILDE__ replaced with ~. Needed because the DELIMITER is ~. + Appends the escaped token to sb iff an escape sequence + occurred. */ + private static String getEscapedToken(StringTokenizer st, + StringBuffer sb) { + String unescaped = st.nextToken(); + int start; + if ((start = unescaped.indexOf("__TILDE__")) >= 0) { + StringBuffer x = new StringBuffer(unescaped); + x.replace(start, "__TILDE__".length(), "~"); + sb.append(x.toString()); + return x.toString(); + } else { + return unescaped; + } + } /** * This method reads the data file ("tibwn.ini"), constructs * the character, punctuation, and vowel lists, as well as @@ -379,14 +396,21 @@ public class TibetanMachineWeb implements THDLWylieConstants { int k = 0; + StringBuffer escapedToken = new StringBuffer(""); + ThdlDebug.verify(escapedToken.length() == 0); while (st.hasMoreTokens() && (!ignore || (k <= 3 /* 3 from 'case 3:' */))) { - String val = st.nextToken(); + String val = getEscapedToken(st, escapedToken); - if (val.equals(DELIMITER)) + if (val.equals(DELIMITER) + && escapedToken.length() == 0) { k++; + } else if (!val.equals("")) { + if (escapedToken.length() != 0) { + escapedToken = new StringBuffer(""); + ThdlDebug.verify(escapedToken.length() == 0); + } - else if (!val.equals("")) { switch (k) { case 0: //wylie key if (!ignore) { @@ -476,8 +500,11 @@ public class TibetanMachineWeb implements THDLWylieConstants { tibHash.put(wylie, duffCodes); } - int font = duffCodes[2].getFontNum(); - int code = duffCodes[2].getCharNum()-32; + if (null == duffCodes[TMW]) + throw new Error(fileName + + " has a line with wylie " + wylie + " but no TMW; that's not allowed"); + int font = duffCodes[TMW].getFontNum(); + int code = duffCodes[TMW].getCharNum()-32; toHashKey[font][code] = wylie; } } @@ -863,9 +890,8 @@ public static boolean hasGlyph(String hashKey) { */ public static DuffCode getGlyph(String hashKey) { DuffCode[] dc = (DuffCode[])tibHash.get(hashKey); - // If dc is null, then likely you misconfigured tibwn.ini such - // that, say, M is expected (i.e., it is listed as, - // e.g. punctuation), but no 'M~...' line appears. + if (null == dc) + throw new Error("It is likely that you misconfigured tibwn.ini such that, say, M is expected (i.e., it is listed as, e.g. punctuation), but no 'M~...' line appears."); return dc[TMW]; } diff --git a/source/org/thdl/tib/text/tibwn.ini b/source/org/thdl/tib/text/tibwn.ini index f8d9af2..b735013 100644 --- a/source/org/thdl/tib/text/tibwn.ini +++ b/source/org/thdl/tib/text/tibwn.ini @@ -21,13 +21,13 @@ k,kh,g,ng,c,ch,j,ny,t,th,d,n,p,ph,b,m,ts,tsh,dz,w,zh,z,',y,r,l,sh,s,h,a,T,Th,D,N a,i,u,e,o,I,U,ai,au,A,-i,-I -_, ,/,|,!,:,;,@,#,$,%,(,),H,M,`,&,@# -// FIXME: add these etc.: M^,<,>,{,},[,],?,~ +_, ,/,|,!,:,;,@,#,$,%,(,),H,M,`,&,@#,?,=,[,],<,>,{,},* +// FIXME: add these etc.: M^,~,~^ //_~32,1~0,32 // ~45,1~0,45~~~~~~~0F0B -_~32,1~~1,32 +_~32,1~~1,32~~~~~~~0020 ~45,1~~1,45~~~~~~~0F0B /~202,1~~1,107~~~~~~~0F0D |~203,1~~1,103~~~~~~~0F11 @@ -42,10 +42,29 @@ $~38,5~~9,41~~~~~~~0F06 )~209,1~~9,94~~~~~~~0F3D H~239,1~~8,92~~~~~~~0F7F M~238,1~~8,90~~~~~~~0F7E -`~241,1~~8,94~~~~~~~0F83 -// I thought EWTS said 0F83 was M^, not ` &~177,4~~8,93~~~~~~~0F85 @#~201,1~~9,40~~~~~~~0F04,0F05 +// abbreviation sign: +=~207,1~~1,106~~~~~~~0F34 +// virama: +?~252,1~~8,105~~~~~~~0F84 +// rgya.gram.shad: +]~48,5~~9,51~~~~~~~0F12 +// shad + single ornament: +[~43,5~~9,47~~~~~~~0F10 +// gug.rtags.gyon: +<~95,5~~9,95~~~~~~~0F3A +// gug.rtags.gyas: +>~96,5~~9,96~~~~~~~0F3B +// yar.tshes.rtags: +{~134,5~~10,39~~~~~~~0F3E +// mar.tshes.rtags +}~135,5~~10,40~~~~~~~0F3F +// dzud.rtags.me.long.can: +`~94,5~~9,92~~~~~~~0F13 +// hard tsheg: +*~205,1~~1,108~~~~~~~0F0C + // FIXME: 0F5F,0F39 might work, but the OpenType font's author must've had Dza @@ -687,26 +706,30 @@ a+r+y~145,4~~8,65~1,109~8,121~1,123~1,125~8,107~8,114 9+1/2~79,5~~9,81~~~~~~~0F32 +// FIXME: make this punctuation: +M^~241,1~~8,94~~~~~~~0F83 +// bindu + datse + thigle (FIXME: make this punctuation): +__TILDE__^~242,1~~8,95~~~~~~~0F82 //punctuation -_~32,1~~1,32 +_~32,1~~1,32~~~~~~~0020 ~45,1~~1,45~~~~~~~0F0B -_~32,1~~2,32 +_~32,1~~2,32~~~~~~~0020 ~45,1~~2,45~~~~~~~0F0B -_~32,2~~3,32 +_~32,2~~3,32~~~~~~~0020 ~45,2~~3,45~~~~~~~0F0B -_~32,2~~4,32 +_~32,2~~4,32~~~~~~~0020 ~45,2~~4,45~~~~~~~0F0B -_~32,3~~5,32 +_~32,3~~5,32~~~~~~~0020 ~45,3~~5,45~~~~~~~0F0B -_~32,3~~6,32 +_~32,3~~6,32~~~~~~~0020 ~45,3~~6,45~~~~~~~0F0B -_~32,4~~7,32 +_~32,4~~7,32~~~~~~~0020 ~45,4~~7,45~~~~~~~0F0B -_~32,4~~8,32 +_~32,4~~8,32~~~~~~~0020 ~45,4~~8,45~~~~~~~0F0B -_~32,5~~9,32 +_~32,5~~9,32~~~~~~~0020 ~45,5~~9,45~~~~~~~0F0B -_~32,5~~10,32 +_~32,5~~10,32~~~~~~~0020 ~45,5~~10,45~~~~~~~0F0B //bindus @@ -895,11 +918,8 @@ nyi.zla~91,5~~9,89 kuruka~92,5~~9,90 no name~93,5~~9,91 -dzud.rtags.me.long.can~94,5~~9,92~~~~~~~0F13 dbu.khang.g-yon~208,1~~9,93~~~~~~~0F3C dbu.khang.g-yas~209,1~~9,94~~~~~~~0F3D -gug.rtags.gyon~95,5~~9,95~~~~~~~0F3A -gug.rtags.gyas~96,5~~9,96~~~~~~~0F3B yungs.drung (reversed)~97,5~~9,97 yungs.drung (standard)~98,5~~9,98 @@ -938,13 +958,10 @@ logo sign lhag.rtags~130,5~~10,35~~~~~~~0F16 sgra.gcan.char.rtags~131,5~~10,36~~~~~~~0F17 khyud.pa~132,5~~10,37~~~~~~~0F18 sdong.tshugs~133,5~~10,38~~~~~~~0F19 -yar.tshes.rtags~134,5~~10,39~~~~~~~0F3E -mar.tshes.rtags~135,5~~10,40~~~~~~~0F3F rinchen shad~203,1~~1,103~~~~~~~0F11 sbrul shad~204,1~~1,104~~~~~~~0F08 gter tsheg~206,1~~1,105~~~~~~~0F14 -abbreviation sign~207,1~~1,106~~~~~~~0F34 utsama ka~57,2~~3,59~1,109~4,120~1,125~1,123~4,106~4,113 utsama kha~58,2~~3,60~1,109~4,120~1,125~1,123~4,106~4,113 @@ -984,7 +1001,6 @@ nor.bu.bzhi.khyil~198,5~~10,70~~~~~~~0FCC small bindu~~~8,91~~~~~~~0F7E bindu + datse~241,1~~8,94~~~~~~~0F83 -bindu + datse + thigle~242,1~~8,95~~~~~~~0F82 bindu + kigu~243,1~~8,96 bindu + short gigu~244,1~~8,97 bindu + log yig gigu~245,1~~8,98 @@ -994,7 +1010,6 @@ bindu + double drengbu~248,1~~8,101 bindu + normal naro~249,1~~8,102 bindu + raised naro~250,1~~8,103 bindu + double naro~251,1~~8,104 -virama~252,1~~8,105~~~~~~~0F84 zhu.yig.mgo.rgyan~33,5~~9,33~~~~~~~0F0A bka'.shog.mgo.rgyan~34,5~~9,34 @@ -1010,13 +1025,9 @@ Yig.mgo.tsheg.shad~39,5~~9,42~~~~~~~0F07 shad + single tsheg~40,5~~9,43~~~~~~~0F0F shad (hooked) + single tsheg~41,5~~9,44 shad + double tsheg~42,5~~9,46 -shad + single ornament~43,5~~9,47~~~~~~~0F10 sbrul.shad + single ornament~44,5~~9,48 sbrul.shad + double ornament~46,5~~9,49 sbrul.shad variant form~47,5~~9,50 -rgya.gram.shad~48,5~~9,51~~~~~~~0F12 - -hard tsheg~205,1~~1,108~~~~~~~0F0C reversed hu~147,5~~10,43 Inverted ha~148,5~~10,44