diff --git a/source/org/thdl/tib/text/DuffCode.java b/source/org/thdl/tib/text/DuffCode.java index bbc9a5d..cd3fb20 100644 --- a/source/org/thdl/tib/text/DuffCode.java +++ b/source/org/thdl/tib/text/DuffCode.java @@ -23,18 +23,13 @@ import java.util.StringTokenizer; import org.thdl.util.ThdlDebug; /** -* A wrapper for the primitive data types -* that combine to represent a Tibetan glyph in the -* TibetanMachineWeb family of fonts. +* An immutable representation of a Tibetan glyph in the +* TibetanMachineWeb or TibetanMachine families of fonts. * -* A DuffCode consists of a font number, a character, and -* a character number. A font identification and a character -* (or character number) are sufficient to uniquely identify -* any TibetanMachineWeb glyph. +* A DuffCode consists of a font number, a character, and a character +* number. A font identification and a character are sufficient to +* uniquely identify any TibetanMachineWeb or TibetanMachine glyph. * -* Note that DuffCodes are sometimes used, internally, to represent -* glyphs in other fonts, e.g. the TibetanMachine font. But mainly -* they represent TibetanMachineWeb glyphs. * @author Edward Garrett, Tibetan and Himalayan Digital Library * @version 1.0 */ @@ -154,10 +149,20 @@ public final class DuffCode { } /** -* @return a string representation of this object -*/ +* @return a string representation of this object */ public String toString() { - return ""; + } +/** + * @param TMW if this DuffCode represents a TMW glyph, not a TM glyph + * @return a string representation of this object */ + public String toString(boolean TMW) { + return ""; } diff --git a/source/org/thdl/tib/text/TibetanMachineWeb.java b/source/org/thdl/tib/text/TibetanMachineWeb.java index 5d4af05..221f01f 100644 --- a/source/org/thdl/tib/text/TibetanMachineWeb.java +++ b/source/org/thdl/tib/text/TibetanMachineWeb.java @@ -69,7 +69,8 @@ public class TibetanMachineWeb implements THDLWylieConstants { private static Map tibHash = new HashMap(); private static Map binduMap = new HashMap(); private static String[][] toHashKey = new String[11][95]; //note: toHashKey[0][..] is not used - private static DuffCode[][] TMtoTMW = new DuffCode[5][255-32]; + private static DuffCode[][] TMtoTMW = new DuffCode[5][255-32]; // ordinal 255 doesn't occur in TM + private static DuffCode[][] TMWtoTM = new DuffCode[10][127-32]; // ordinal 127 doesn't occur in TMW private static String fileName = "tibwn.ini"; private static final String DELIMITER = "~"; private static Set top_vowels; @@ -354,15 +355,21 @@ public class TibetanMachineWeb implements THDLWylieConstants { ; else if (line.equals("")) //empty string ; - else if (!ignore) { + else { StringTokenizer st = new StringTokenizer(line,DELIMITER,true); - String wylie = new String(); - DuffCode[] duffCodes = new DuffCode[11]; + String wylie = null; + DuffCode[] duffCodes; + if (ignore) { + duffCodes = new DuffCode[TMW + 1]; + } else { + duffCodes = new DuffCode[11]; + } int k = 0; - while (st.hasMoreTokens()) { + while (st.hasMoreTokens() + && (!ignore || (k <= 3 /* 3 from 'case 3:' */))) { String val = st.nextToken(); if (val.equals(DELIMITER)) @@ -371,7 +378,9 @@ public class TibetanMachineWeb implements THDLWylieConstants { else if (!val.equals("")) { switch (k) { case 0: //wylie key - wylie = val; + if (!ignore) { + wylie = val; + } break; case 1: @@ -379,11 +388,13 @@ public class TibetanMachineWeb implements THDLWylieConstants { break; case 2: //reduced-size character if there is one - duffCodes[REDUCED_C] = new DuffCode(val,true); + if (!ignore) { + duffCodes[REDUCED_C] = new DuffCode(val,true); + } break; case 3: //TibetanMachineWeb code - duffCodes[k-1/* TMW */] = new DuffCode(val,true); + duffCodes[TMW] = new DuffCode(val,true); // TibetanMachineWeb7.91, for // example, has no TM(win32) // equivalent (though it has a @@ -391,39 +402,68 @@ public class TibetanMachineWeb implements THDLWylieConstants { // test for null here: if (null != duffCodes[TM]) { TMtoTMW[duffCodes[TM].getFontNum()-1][duffCodes[TM].getCharNum()-32] - = duffCodes[TMW]; + = duffCodes[TMW]; // TM->TMW mapping } + // but no null test is necessary + // here for either the TMW or the + // TM glyph (though the TM glyph + // could well be null): + TMWtoTM[duffCodes[TMW].getFontNum()-1][duffCodes[TMW].getCharNum()-32] + = duffCodes[TM]; // TMW->TM mapping break; + // Vowels etc. to use with this glyph: case 4: case 5: case 6: case 7: case 8: case 9: - duffCodes[k-1] = new DuffCode(val,true); + if (!ignore) { + duffCodes[k-1] = new DuffCode(val,true); + } break; case 10: //Unicode: ignore for now + ThdlDebug.verify(val.length() == 4); + try { + int x; + ThdlDebug.verify((x = Integer.parseInt(val, 16)) >= 0x0F00 + && x <= 0x0FFF); + } catch (NumberFormatException e) { + ThdlDebug.verify(false); + } break; case 11: //half-height character if there is one - duffCodes[HALF_C] = new DuffCode(val,true); + if (!ignore) { + duffCodes[HALF_C] = new DuffCode(val,true); + } break; case 12: //special bindu-value if vowel+bindu are one glyph - DuffCode binduCode = new DuffCode(val,true); - binduMap.put(duffCodes[TMW],binduCode); + if (!ignore) { + DuffCode binduCode = new DuffCode(val,true); + binduMap.put(duffCodes[TMW],binduCode); + } break; } } } - if (hashOn) - tibHash.put(wylie,duffCodes); + if (!ignore) { + if (null == wylie) + throw new Error(fileName + + " has a line ^" + + DELIMITER + + " which means that no Wylie is assigned. That isn't supported."); + if (hashOn) { + tibHash.put(wylie, duffCodes); + } - int font = duffCodes[2].getFontNum(); - int code = duffCodes[2].getCharNum()-32; - toHashKey[font][code] = wylie; + int font = duffCodes[2].getFontNum(); + int code = duffCodes[2].getCharNum()-32; + toHashKey[font][code] = wylie; + } } } } @@ -811,7 +851,95 @@ public static DuffCode getHalfHeightGlyph(String hashKey) { return dc[REDUCED_C]; } +/** Returns the DuffCode for the TibetanMachineWeb glyph corresponding + to the given TibetanMachine font + (0=norm,1=Skt1,2=Skt2,3=Skt3,4=Skt4) and character(32-254). + + Null is never returned for an existing TibetanMachine glyph, + because every TibetanMachine glyph has a corresponding + TibetanMachineWeb glyph. But if (font, ord) doesn't correspond to + an existing TibetanMachine glyph, null is returned. In general, + though, this method may raise a runtime exception if you pass in a + (font, ord) that doesn't correspond to an existing TibetanMachine + glyph. */ +public static DuffCode mapTMtoTMW(int font, int ordinal) { + DuffCode ans = TMtoTMW[font][ordinal-32]; + // comment this out to test via main(..): + ThdlDebug.verify(null != ans); + return ans; +} + +/** Returns the DuffCode for the TibetanMachine glyph corresponding to + the given TibetanMachineWeb font + (0=TibetanMachineWeb,1=TibetanMachineWeb1,...) and character(32-127). + + Null is returned for an existing TibetanMachineWeb glyph only if + that glyph is TibetanMachineWeb7.91, because every other + TibetanMachineWeb glyph has a corresponding TibetanMachine glyph. + But if (font, ord) isn't (7, 91) and doesn't correspond to an + existing TibetanMachineWeb glyph, null is returned. In general, + though, this method may raise a runtime exception if you pass in a + (font, ord) that doesn't correspond to an existing + TibetanMachineWeb glyph. */ +public static DuffCode mapTMWtoTM(int font, int ordinal) { + DuffCode ans = TMWtoTM[font][ordinal-32]; + // comment this out to test via main(..): + ThdlDebug.verify(null != ans || (font == 7 && ordinal == 91)); + return ans; +} + +/** Tests the TMW->TM and TM->TMW mappings. */ +public static void main(String[] args) { + int font, ord, count; + + count = 0; + for (font = 0; font < 5; font++) { + for (ord = 32; ord < 255; ord++) { + if (mapTMtoTMW(font, ord) != null) { + count++; + } + } + System.out.println("Found " + count + " TM->TMW mappings (thus far)."); + } + + count = 0; + for (font = 0; font < 10; font++) { + for (ord = 32; ord < 127; ord++) { + if (mapTMWtoTM(font, ord) != null) { + count++; + } + } + System.out.println("Found " + count + " TMW->TM mappings (thus far)."); + } + + System.out.println("TMWtoTM: "); + for (font = 0; font < 10; font++) { + for (ord = 32; ord < 127; ord++) { + DuffCode dc; + if ((dc = mapTMWtoTM(font, ord)) != null) { + System.out.println(dc.getCharNum() + " " + + (dc.getFontNum()-1) + " " + + font + " " + + ord); + } + } + } + + System.out.println("TMtoTMW: (use sort -g -k 3 -k 4): "); + for (font = 0; font < 5; font++) { + for (ord = 32; ord < 255; ord++) { + DuffCode dc; + if ((dc = mapTMtoTMW(font, ord)) != null) { + System.out.println(ord + " " + font + " " + + (dc.getFontNum()-1) + " " + + dc.getCharNum()); + } + } + } +} + private static DuffCode getTMtoTMW(int font, int code) { + if (false) { // DLC FIXME: why was this here? if (code > 255-32) { switch (code) { case 8218-32: //sby @@ -842,6 +970,7 @@ private static DuffCode getTMtoTMW(int font, int code) { return null; } } + } return TMtoTMW[font][code]; } @@ -947,7 +1076,7 @@ public static String getWylieForGlyph(DuffCode dc) { // This error message is documented in // www/htdocs/TMW_RTF_TO_THDL_WYLIE.html, so change them both // when you change this. - return "<<[[JSKAD_TMW_TO_WYLIE_ERROR_NO_SUCH_WYLIE: Cannot convert DuffCode " + dc + " to THDL Extended Wylie. Please see the documentation for the TMW font and transcribe this yourself.]]>>"; + return "<<[[JSKAD_TMW_TO_WYLIE_ERROR_NO_SUCH_WYLIE: Cannot convert DuffCode " + dc.toString(true) + " to THDL Extended Wylie. Please see the documentation for the TMW font and transcribe this yourself.]]>>"; } return wylieForGlyph(hashKey); } diff --git a/source/org/thdl/tib/text/tibwn.ini b/source/org/thdl/tib/text/tibwn.ini index d84f2b2..34398fb 100644 --- a/source/org/thdl/tib/text/tibwn.ini +++ b/source/org/thdl/tib/text/tibwn.ini @@ -22,6 +22,7 @@ a,i,u,e,o,I,U,ai,au,A,-i,-I _, ,/,|,!,:,;,@,#,$,%,(,),H,M,`,&,@# +// FIXME: add these etc.: M^,<,>,{,},[,],?,~ //_~32,1~0,32 @@ -42,6 +43,7 @@ $~38,5~~9,41~~~~~~~0F06 H~239,1~~8,92~~~~~~~0F7F M~~~8,91~~~~~~~0F7E `~241,1~~8,94~~~~~~~0F83 +// I thought EWTS said 0F83 was M^, not ` &~177,4~~8,93~~~~~~~0F85 @#~201,1~~9,40 @@ -688,25 +690,26 @@ _~32,1~~1,32 ~45,1~~1,45~~~~~~~0F0B _~32,1~~2,32 ~45,1~~2,45~~~~~~~0F0B -_~32,1~~3,32 - ~45,1~~3,45~~~~~~~0F0B -_~32,1~~4,32 - ~45,1~~4,45~~~~~~~0F0B -_~32,1~~5,32 - ~45,1~~5,45~~~~~~~0F0B -_~32,1~~6,32 - ~45,1~~6,45~~~~~~~0F0B -_~32,1~~7,32 - ~45,1~~7,45~~~~~~~0F0B -_~32,1~~8,32 - ~45,1~~8,45~~~~~~~0F0B -_~32,1~~9,32 - ~45,1~~9,45~~~~~~~0F0B -_~32,1~~10,32 - ~45,1~~10,45~~~~~~~0F0B +_~32,2~~3,32 + ~45,2~~3,45~~~~~~~0F0B +_~32,2~~4,32 + ~45,2~~4,45~~~~~~~0F0B +_~32,3~~5,32 + ~45,3~~5,45~~~~~~~0F0B +_~32,3~~6,32 + ~45,3~~6,45~~~~~~~0F0B +_~32,4~~7,32 + ~45,4~~7,45~~~~~~~0F0B +_~32,4~~8,32 + ~45,4~~8,45~~~~~~~0F0B +_~32,5~~9,32 + ~45,5~~9,45~~~~~~~0F0B +_~32,5~~10,32 + ~45,5~~10,45~~~~~~~0F0B //bindus `~241,1~~8,94~~~~~~~0F83 +// I thought EWTS said 0F83 was M^, not ` iM~243,1~~8,96 iM~244,1~~8,97 -iM~245,1~~8,98 @@ -921,12 +924,12 @@ vhite and black pebble~119,5~~9,119~~~~~~~0F1F triple vhite pebble~120,5~~9,120~~~~~~~0F1C triple black pebble~121,5~~9,121~~~~~~~0FCF -122,5~~9,122 -123,5~~9,123 -124,5~~9,124 -125,5~~9,125 -126,5~~9,126 -128,5~~10,33 +~122,5~~9,122 +~123,5~~9,123 +~124,5~~9,124 +~125,5~~9,125 +~126,5~~9,126 +~128,5~~10,33 logo sign chad.rtags~129,5~~10,34~~~~~~~0F15 logo sign lhag.rtags~130,5~~10,35~~~~~~~0F16 @@ -994,7 +997,7 @@ zhu.yig.mgo.rgyan~33,5~~9,33~~~~~~~0F0A bka'.shog.mgo.rgyan~34,5~~9,34 mnyam.yig.mgo.rgyan~35,5~~9,35 mnyam.yig.mgo.rgyan~36,5~~9,36~~~~~~~0F09 -37,5~~9,37 +~37,5~~9,37 zla tse gcig~210,1~~9,38~~~~~~~0F04 half zla tse gcig~200,1~~9,39~~~~~~~0F05 // zla tse gnyis~201,1~~9,40 is now punctuation.