From bfacd6c9983a73be82440ab2067841296a14c823 Mon Sep 17 00:00:00 2001 From: dchandler Date: Sat, 31 May 2003 20:13:15 +0000 Subject: [PATCH] Accurate TM->TMW and TMW->TM mappings are now available. I've verified this extensively and have full confidence that these mappings agree with Tony Duff's Tibetan! 5.1 documentation (except as described below). To get them, I had to disregard Tony Duff's tables for a few glyphs: the characters with ordinal 32 and 45 (space and hyphen in Roman ASCII, space and tsheg in Tibetan). For these glyphs, we must have mappings from TibetanMachineSkt4.32 to something, etc., and those mappings were not present. I've normalized the mapping for these glyphs, as it is arbitrary because the same two glyphs just appear fifteen times each. --- source/org/thdl/tib/text/DuffCode.java | 31 ++-- .../org/thdl/tib/text/TibetanMachineWeb.java | 167 ++++++++++++++++-- source/org/thdl/tib/text/tibwn.ini | 49 ++--- 3 files changed, 192 insertions(+), 55 deletions(-) diff --git a/source/org/thdl/tib/text/DuffCode.java b/source/org/thdl/tib/text/DuffCode.java index bbc9a5d..cd3fb20 100644 --- a/source/org/thdl/tib/text/DuffCode.java +++ b/source/org/thdl/tib/text/DuffCode.java @@ -23,18 +23,13 @@ import java.util.StringTokenizer; import org.thdl.util.ThdlDebug; /** -* A wrapper for the primitive data types -* that combine to represent a Tibetan glyph in the -* TibetanMachineWeb family of fonts. +* An immutable representation of a Tibetan glyph in the +* TibetanMachineWeb or TibetanMachine families of fonts. * -* A DuffCode consists of a font number, a character, and -* a character number. A font identification and a character -* (or character number) are sufficient to uniquely identify -* any TibetanMachineWeb glyph. +* A DuffCode consists of a font number, a character, and a character +* number. A font identification and a character are sufficient to +* uniquely identify any TibetanMachineWeb or TibetanMachine glyph. * -* Note that DuffCodes are sometimes used, internally, to represent -* glyphs in other fonts, e.g. the TibetanMachine font. But mainly -* they represent TibetanMachineWeb glyphs. * @author Edward Garrett, Tibetan and Himalayan Digital Library * @version 1.0 */ @@ -154,10 +149,20 @@ public final class DuffCode { } /** -* @return a string representation of this object -*/ +* @return a string representation of this object */ public String toString() { - return ""; + } +/** + * @param TMW if this DuffCode represents a TMW glyph, not a TM glyph + * @return a string representation of this object */ + public String toString(boolean TMW) { + return ""; } diff --git a/source/org/thdl/tib/text/TibetanMachineWeb.java b/source/org/thdl/tib/text/TibetanMachineWeb.java index 5d4af05..221f01f 100644 --- a/source/org/thdl/tib/text/TibetanMachineWeb.java +++ b/source/org/thdl/tib/text/TibetanMachineWeb.java @@ -69,7 +69,8 @@ public class TibetanMachineWeb implements THDLWylieConstants { private static Map tibHash = new HashMap(); private static Map binduMap = new HashMap(); private static String[][] toHashKey = new String[11][95]; //note: toHashKey[0][..] is not used - private static DuffCode[][] TMtoTMW = new DuffCode[5][255-32]; + private static DuffCode[][] TMtoTMW = new DuffCode[5][255-32]; // ordinal 255 doesn't occur in TM + private static DuffCode[][] TMWtoTM = new DuffCode[10][127-32]; // ordinal 127 doesn't occur in TMW private static String fileName = "tibwn.ini"; private static final String DELIMITER = "~"; private static Set top_vowels; @@ -354,15 +355,21 @@ public class TibetanMachineWeb implements THDLWylieConstants { ; else if (line.equals("")) //empty string ; - else if (!ignore) { + else { StringTokenizer st = new StringTokenizer(line,DELIMITER,true); - String wylie = new String(); - DuffCode[] duffCodes = new DuffCode[11]; + String wylie = null; + DuffCode[] duffCodes; + if (ignore) { + duffCodes = new DuffCode[TMW + 1]; + } else { + duffCodes = new DuffCode[11]; + } int k = 0; - while (st.hasMoreTokens()) { + while (st.hasMoreTokens() + && (!ignore || (k <= 3 /* 3 from 'case 3:' */))) { String val = st.nextToken(); if (val.equals(DELIMITER)) @@ -371,7 +378,9 @@ public class TibetanMachineWeb implements THDLWylieConstants { else if (!val.equals("")) { switch (k) { case 0: //wylie key - wylie = val; + if (!ignore) { + wylie = val; + } break; case 1: @@ -379,11 +388,13 @@ public class TibetanMachineWeb implements THDLWylieConstants { break; case 2: //reduced-size character if there is one - duffCodes[REDUCED_C] = new DuffCode(val,true); + if (!ignore) { + duffCodes[REDUCED_C] = new DuffCode(val,true); + } break; case 3: //TibetanMachineWeb code - duffCodes[k-1/* TMW */] = new DuffCode(val,true); + duffCodes[TMW] = new DuffCode(val,true); // TibetanMachineWeb7.91, for // example, has no TM(win32) // equivalent (though it has a @@ -391,39 +402,68 @@ public class TibetanMachineWeb implements THDLWylieConstants { // test for null here: if (null != duffCodes[TM]) { TMtoTMW[duffCodes[TM].getFontNum()-1][duffCodes[TM].getCharNum()-32] - = duffCodes[TMW]; + = duffCodes[TMW]; // TM->TMW mapping } + // but no null test is necessary + // here for either the TMW or the + // TM glyph (though the TM glyph + // could well be null): + TMWtoTM[duffCodes[TMW].getFontNum()-1][duffCodes[TMW].getCharNum()-32] + = duffCodes[TM]; // TMW->TM mapping break; + // Vowels etc. to use with this glyph: case 4: case 5: case 6: case 7: case 8: case 9: - duffCodes[k-1] = new DuffCode(val,true); + if (!ignore) { + duffCodes[k-1] = new DuffCode(val,true); + } break; case 10: //Unicode: ignore for now + ThdlDebug.verify(val.length() == 4); + try { + int x; + ThdlDebug.verify((x = Integer.parseInt(val, 16)) >= 0x0F00 + && x <= 0x0FFF); + } catch (NumberFormatException e) { + ThdlDebug.verify(false); + } break; case 11: //half-height character if there is one - duffCodes[HALF_C] = new DuffCode(val,true); + if (!ignore) { + duffCodes[HALF_C] = new DuffCode(val,true); + } break; case 12: //special bindu-value if vowel+bindu are one glyph - DuffCode binduCode = new DuffCode(val,true); - binduMap.put(duffCodes[TMW],binduCode); + if (!ignore) { + DuffCode binduCode = new DuffCode(val,true); + binduMap.put(duffCodes[TMW],binduCode); + } break; } } } - if (hashOn) - tibHash.put(wylie,duffCodes); + if (!ignore) { + if (null == wylie) + throw new Error(fileName + + " has a line ^" + + DELIMITER + + " which means that no Wylie is assigned. That isn't supported."); + if (hashOn) { + tibHash.put(wylie, duffCodes); + } - int font = duffCodes[2].getFontNum(); - int code = duffCodes[2].getCharNum()-32; - toHashKey[font][code] = wylie; + int font = duffCodes[2].getFontNum(); + int code = duffCodes[2].getCharNum()-32; + toHashKey[font][code] = wylie; + } } } } @@ -811,7 +851,95 @@ public static DuffCode getHalfHeightGlyph(String hashKey) { return dc[REDUCED_C]; } +/** Returns the DuffCode for the TibetanMachineWeb glyph corresponding + to the given TibetanMachine font + (0=norm,1=Skt1,2=Skt2,3=Skt3,4=Skt4) and character(32-254). + + Null is never returned for an existing TibetanMachine glyph, + because every TibetanMachine glyph has a corresponding + TibetanMachineWeb glyph. But if (font, ord) doesn't correspond to + an existing TibetanMachine glyph, null is returned. In general, + though, this method may raise a runtime exception if you pass in a + (font, ord) that doesn't correspond to an existing TibetanMachine + glyph. */ +public static DuffCode mapTMtoTMW(int font, int ordinal) { + DuffCode ans = TMtoTMW[font][ordinal-32]; + // comment this out to test via main(..): + ThdlDebug.verify(null != ans); + return ans; +} + +/** Returns the DuffCode for the TibetanMachine glyph corresponding to + the given TibetanMachineWeb font + (0=TibetanMachineWeb,1=TibetanMachineWeb1,...) and character(32-127). + + Null is returned for an existing TibetanMachineWeb glyph only if + that glyph is TibetanMachineWeb7.91, because every other + TibetanMachineWeb glyph has a corresponding TibetanMachine glyph. + But if (font, ord) isn't (7, 91) and doesn't correspond to an + existing TibetanMachineWeb glyph, null is returned. In general, + though, this method may raise a runtime exception if you pass in a + (font, ord) that doesn't correspond to an existing + TibetanMachineWeb glyph. */ +public static DuffCode mapTMWtoTM(int font, int ordinal) { + DuffCode ans = TMWtoTM[font][ordinal-32]; + // comment this out to test via main(..): + ThdlDebug.verify(null != ans || (font == 7 && ordinal == 91)); + return ans; +} + +/** Tests the TMW->TM and TM->TMW mappings. */ +public static void main(String[] args) { + int font, ord, count; + + count = 0; + for (font = 0; font < 5; font++) { + for (ord = 32; ord < 255; ord++) { + if (mapTMtoTMW(font, ord) != null) { + count++; + } + } + System.out.println("Found " + count + " TM->TMW mappings (thus far)."); + } + + count = 0; + for (font = 0; font < 10; font++) { + for (ord = 32; ord < 127; ord++) { + if (mapTMWtoTM(font, ord) != null) { + count++; + } + } + System.out.println("Found " + count + " TMW->TM mappings (thus far)."); + } + + System.out.println("TMWtoTM: "); + for (font = 0; font < 10; font++) { + for (ord = 32; ord < 127; ord++) { + DuffCode dc; + if ((dc = mapTMWtoTM(font, ord)) != null) { + System.out.println(dc.getCharNum() + " " + + (dc.getFontNum()-1) + " " + + font + " " + + ord); + } + } + } + + System.out.println("TMtoTMW: (use sort -g -k 3 -k 4): "); + for (font = 0; font < 5; font++) { + for (ord = 32; ord < 255; ord++) { + DuffCode dc; + if ((dc = mapTMtoTMW(font, ord)) != null) { + System.out.println(ord + " " + font + " " + + (dc.getFontNum()-1) + " " + + dc.getCharNum()); + } + } + } +} + private static DuffCode getTMtoTMW(int font, int code) { + if (false) { // DLC FIXME: why was this here? if (code > 255-32) { switch (code) { case 8218-32: //sby @@ -842,6 +970,7 @@ private static DuffCode getTMtoTMW(int font, int code) { return null; } } + } return TMtoTMW[font][code]; } @@ -947,7 +1076,7 @@ public static String getWylieForGlyph(DuffCode dc) { // This error message is documented in // www/htdocs/TMW_RTF_TO_THDL_WYLIE.html, so change them both // when you change this. - return "<<[[JSKAD_TMW_TO_WYLIE_ERROR_NO_SUCH_WYLIE: Cannot convert DuffCode " + dc + " to THDL Extended Wylie. Please see the documentation for the TMW font and transcribe this yourself.]]>>"; + return "<<[[JSKAD_TMW_TO_WYLIE_ERROR_NO_SUCH_WYLIE: Cannot convert DuffCode " + dc.toString(true) + " to THDL Extended Wylie. Please see the documentation for the TMW font and transcribe this yourself.]]>>"; } return wylieForGlyph(hashKey); } diff --git a/source/org/thdl/tib/text/tibwn.ini b/source/org/thdl/tib/text/tibwn.ini index d84f2b2..34398fb 100644 --- a/source/org/thdl/tib/text/tibwn.ini +++ b/source/org/thdl/tib/text/tibwn.ini @@ -22,6 +22,7 @@ a,i,u,e,o,I,U,ai,au,A,-i,-I _, ,/,|,!,:,;,@,#,$,%,(,),H,M,`,&,@# +// FIXME: add these etc.: M^,<,>,{,},[,],?,~ //_~32,1~0,32 @@ -42,6 +43,7 @@ $~38,5~~9,41~~~~~~~0F06 H~239,1~~8,92~~~~~~~0F7F M~~~8,91~~~~~~~0F7E `~241,1~~8,94~~~~~~~0F83 +// I thought EWTS said 0F83 was M^, not ` &~177,4~~8,93~~~~~~~0F85 @#~201,1~~9,40 @@ -688,25 +690,26 @@ _~32,1~~1,32 ~45,1~~1,45~~~~~~~0F0B _~32,1~~2,32 ~45,1~~2,45~~~~~~~0F0B -_~32,1~~3,32 - ~45,1~~3,45~~~~~~~0F0B -_~32,1~~4,32 - ~45,1~~4,45~~~~~~~0F0B -_~32,1~~5,32 - ~45,1~~5,45~~~~~~~0F0B -_~32,1~~6,32 - ~45,1~~6,45~~~~~~~0F0B -_~32,1~~7,32 - ~45,1~~7,45~~~~~~~0F0B -_~32,1~~8,32 - ~45,1~~8,45~~~~~~~0F0B -_~32,1~~9,32 - ~45,1~~9,45~~~~~~~0F0B -_~32,1~~10,32 - ~45,1~~10,45~~~~~~~0F0B +_~32,2~~3,32 + ~45,2~~3,45~~~~~~~0F0B +_~32,2~~4,32 + ~45,2~~4,45~~~~~~~0F0B +_~32,3~~5,32 + ~45,3~~5,45~~~~~~~0F0B +_~32,3~~6,32 + ~45,3~~6,45~~~~~~~0F0B +_~32,4~~7,32 + ~45,4~~7,45~~~~~~~0F0B +_~32,4~~8,32 + ~45,4~~8,45~~~~~~~0F0B +_~32,5~~9,32 + ~45,5~~9,45~~~~~~~0F0B +_~32,5~~10,32 + ~45,5~~10,45~~~~~~~0F0B //bindus `~241,1~~8,94~~~~~~~0F83 +// I thought EWTS said 0F83 was M^, not ` iM~243,1~~8,96 iM~244,1~~8,97 -iM~245,1~~8,98 @@ -921,12 +924,12 @@ vhite and black pebble~119,5~~9,119~~~~~~~0F1F triple vhite pebble~120,5~~9,120~~~~~~~0F1C triple black pebble~121,5~~9,121~~~~~~~0FCF -122,5~~9,122 -123,5~~9,123 -124,5~~9,124 -125,5~~9,125 -126,5~~9,126 -128,5~~10,33 +~122,5~~9,122 +~123,5~~9,123 +~124,5~~9,124 +~125,5~~9,125 +~126,5~~9,126 +~128,5~~10,33 logo sign chad.rtags~129,5~~10,34~~~~~~~0F15 logo sign lhag.rtags~130,5~~10,35~~~~~~~0F16 @@ -994,7 +997,7 @@ zhu.yig.mgo.rgyan~33,5~~9,33~~~~~~~0F0A bka'.shog.mgo.rgyan~34,5~~9,34 mnyam.yig.mgo.rgyan~35,5~~9,35 mnyam.yig.mgo.rgyan~36,5~~9,36~~~~~~~0F09 -37,5~~9,37 +~37,5~~9,37 zla tse gcig~210,1~~9,38~~~~~~~0F04 half zla tse gcig~200,1~~9,39~~~~~~~0F05 // zla tse gnyis~201,1~~9,40 is now punctuation.