Accurate TM->TMW and TMW->TM mappings are now available. I've

verified this extensively and have full confidence that these mappings
agree with Tony Duff's Tibetan! 5.1 documentation (except as described
below).

To get them, I had to disregard Tony Duff's tables for a few glyphs: the
characters with ordinal 32 and 45 (space and hyphen in Roman ASCII,
space and tsheg in Tibetan).  For these glyphs, we must have mappings
from TibetanMachineSkt4.32 to something, etc., and those mappings were
not present.  I've normalized the mapping for these glyphs, as it is arbitrary
because the same two glyphs just appear fifteen times each.
This commit is contained in:
dchandler 2003-05-31 20:13:15 +00:00
parent a4bc23a9ab
commit bfacd6c998
3 changed files with 192 additions and 55 deletions

View file

@ -23,18 +23,13 @@ import java.util.StringTokenizer;
import org.thdl.util.ThdlDebug; import org.thdl.util.ThdlDebug;
/** /**
* A wrapper for the primitive data types * An immutable representation of a Tibetan glyph in the
* that combine to represent a Tibetan glyph in the * TibetanMachineWeb or TibetanMachine families of fonts.
* TibetanMachineWeb family of fonts.
* *
* A DuffCode consists of a font number, a character, and * A DuffCode consists of a font number, a character, and a character
* a character number. A font identification and a character * number. A font identification and a character are sufficient to
* (or character number) are sufficient to uniquely identify * uniquely identify any TibetanMachineWeb or TibetanMachine glyph.
* any TibetanMachineWeb glyph.
* *
* Note that DuffCodes are sometimes used, internally, to represent
* glyphs in other fonts, e.g. the TibetanMachine font. But mainly
* they represent TibetanMachineWeb glyphs.
* @author Edward Garrett, Tibetan and Himalayan Digital Library * @author Edward Garrett, Tibetan and Himalayan Digital Library
* @version 1.0 */ * @version 1.0 */
@ -154,10 +149,20 @@ public final class DuffCode {
} }
/** /**
* @return a string representation of this object * @return a string representation of this object */
*/
public String toString() { public String toString() {
return "<duffcode font=" + TibetanMachineWeb.tmwFontNames[fontNum] return "<duffcode font=" + fontNum
+ " charNum=" + charNum + " character="
+ new Character(getCharacter()).toString() + "/>";
}
/**
* @param TMW if this DuffCode represents a TMW glyph, not a TM glyph
* @return a string representation of this object */
public String toString(boolean TMW) {
return "<duffcode font="
+ (TMW
? TibetanMachineWeb.tmwFontNames
: TibetanMachineWeb.tmFontNames)[fontNum]
+ " charNum=" + charNum + " character=" + " charNum=" + charNum + " character="
+ new Character(getCharacter()).toString() + "/>"; + new Character(getCharacter()).toString() + "/>";
} }

View file

@ -69,7 +69,8 @@ public class TibetanMachineWeb implements THDLWylieConstants {
private static Map tibHash = new HashMap(); private static Map tibHash = new HashMap();
private static Map binduMap = new HashMap(); private static Map binduMap = new HashMap();
private static String[][] toHashKey = new String[11][95]; //note: toHashKey[0][..] is not used private static String[][] toHashKey = new String[11][95]; //note: toHashKey[0][..] is not used
private static DuffCode[][] TMtoTMW = new DuffCode[5][255-32]; private static DuffCode[][] TMtoTMW = new DuffCode[5][255-32]; // ordinal 255 doesn't occur in TM
private static DuffCode[][] TMWtoTM = new DuffCode[10][127-32]; // ordinal 127 doesn't occur in TMW
private static String fileName = "tibwn.ini"; private static String fileName = "tibwn.ini";
private static final String DELIMITER = "~"; private static final String DELIMITER = "~";
private static Set top_vowels; private static Set top_vowels;
@ -354,15 +355,21 @@ public class TibetanMachineWeb implements THDLWylieConstants {
; ;
else if (line.equals("")) //empty string else if (line.equals("")) //empty string
; ;
else if (!ignore) { else {
StringTokenizer st = new StringTokenizer(line,DELIMITER,true); StringTokenizer st = new StringTokenizer(line,DELIMITER,true);
String wylie = new String(); String wylie = null;
DuffCode[] duffCodes = new DuffCode[11]; DuffCode[] duffCodes;
if (ignore) {
duffCodes = new DuffCode[TMW + 1];
} else {
duffCodes = new DuffCode[11];
}
int k = 0; int k = 0;
while (st.hasMoreTokens()) { while (st.hasMoreTokens()
&& (!ignore || (k <= 3 /* 3 from 'case 3:' */))) {
String val = st.nextToken(); String val = st.nextToken();
if (val.equals(DELIMITER)) if (val.equals(DELIMITER))
@ -371,7 +378,9 @@ public class TibetanMachineWeb implements THDLWylieConstants {
else if (!val.equals("")) { else if (!val.equals("")) {
switch (k) { switch (k) {
case 0: //wylie key case 0: //wylie key
if (!ignore) {
wylie = val; wylie = val;
}
break; break;
case 1: case 1:
@ -379,11 +388,13 @@ public class TibetanMachineWeb implements THDLWylieConstants {
break; break;
case 2: //reduced-size character if there is one case 2: //reduced-size character if there is one
if (!ignore) {
duffCodes[REDUCED_C] = new DuffCode(val,true); duffCodes[REDUCED_C] = new DuffCode(val,true);
}
break; break;
case 3: //TibetanMachineWeb code case 3: //TibetanMachineWeb code
duffCodes[k-1/* TMW */] = new DuffCode(val,true); duffCodes[TMW] = new DuffCode(val,true);
// TibetanMachineWeb7.91, for // TibetanMachineWeb7.91, for
// example, has no TM(win32) // example, has no TM(win32)
// equivalent (though it has a // equivalent (though it has a
@ -391,35 +402,63 @@ public class TibetanMachineWeb implements THDLWylieConstants {
// test for null here: // test for null here:
if (null != duffCodes[TM]) { if (null != duffCodes[TM]) {
TMtoTMW[duffCodes[TM].getFontNum()-1][duffCodes[TM].getCharNum()-32] TMtoTMW[duffCodes[TM].getFontNum()-1][duffCodes[TM].getCharNum()-32]
= duffCodes[TMW]; = duffCodes[TMW]; // TM->TMW mapping
} }
// but no null test is necessary
// here for either the TMW or the
// TM glyph (though the TM glyph
// could well be null):
TMWtoTM[duffCodes[TMW].getFontNum()-1][duffCodes[TMW].getCharNum()-32]
= duffCodes[TM]; // TMW->TM mapping
break; break;
// Vowels etc. to use with this glyph:
case 4: case 4:
case 5: case 5:
case 6: case 6:
case 7: case 7:
case 8: case 8:
case 9: case 9:
if (!ignore) {
duffCodes[k-1] = new DuffCode(val,true); duffCodes[k-1] = new DuffCode(val,true);
}
break; break;
case 10: //Unicode: ignore for now case 10: //Unicode: ignore for now
ThdlDebug.verify(val.length() == 4);
try {
int x;
ThdlDebug.verify((x = Integer.parseInt(val, 16)) >= 0x0F00
&& x <= 0x0FFF);
} catch (NumberFormatException e) {
ThdlDebug.verify(false);
}
break; break;
case 11: //half-height character if there is one case 11: //half-height character if there is one
if (!ignore) {
duffCodes[HALF_C] = new DuffCode(val,true); duffCodes[HALF_C] = new DuffCode(val,true);
}
break; break;
case 12: //special bindu-value if vowel+bindu are one glyph case 12: //special bindu-value if vowel+bindu are one glyph
if (!ignore) {
DuffCode binduCode = new DuffCode(val,true); DuffCode binduCode = new DuffCode(val,true);
binduMap.put(duffCodes[TMW],binduCode); binduMap.put(duffCodes[TMW],binduCode);
}
break; break;
} }
} }
} }
if (hashOn) if (!ignore) {
tibHash.put(wylie,duffCodes); if (null == wylie)
throw new Error(fileName
+ " has a line ^"
+ DELIMITER
+ " which means that no Wylie is assigned. That isn't supported.");
if (hashOn) {
tibHash.put(wylie, duffCodes);
}
int font = duffCodes[2].getFontNum(); int font = duffCodes[2].getFontNum();
int code = duffCodes[2].getCharNum()-32; int code = duffCodes[2].getCharNum()-32;
@ -427,6 +466,7 @@ public class TibetanMachineWeb implements THDLWylieConstants {
} }
} }
} }
}
catch (IOException e) { catch (IOException e) {
System.out.println("file Disappeared"); System.out.println("file Disappeared");
ThdlDebug.noteIffyCode(); ThdlDebug.noteIffyCode();
@ -811,7 +851,95 @@ public static DuffCode getHalfHeightGlyph(String hashKey) {
return dc[REDUCED_C]; return dc[REDUCED_C];
} }
/** Returns the DuffCode for the TibetanMachineWeb glyph corresponding
to the given TibetanMachine font
(0=norm,1=Skt1,2=Skt2,3=Skt3,4=Skt4) and character(32-254).
Null is never returned for an existing TibetanMachine glyph,
because every TibetanMachine glyph has a corresponding
TibetanMachineWeb glyph. But if (font, ord) doesn't correspond to
an existing TibetanMachine glyph, null is returned. In general,
though, this method may raise a runtime exception if you pass in a
(font, ord) that doesn't correspond to an existing TibetanMachine
glyph. */
public static DuffCode mapTMtoTMW(int font, int ordinal) {
DuffCode ans = TMtoTMW[font][ordinal-32];
// comment this out to test via main(..):
ThdlDebug.verify(null != ans);
return ans;
}
/** Returns the DuffCode for the TibetanMachine glyph corresponding to
the given TibetanMachineWeb font
(0=TibetanMachineWeb,1=TibetanMachineWeb1,...) and character(32-127).
Null is returned for an existing TibetanMachineWeb glyph only if
that glyph is TibetanMachineWeb7.91, because every other
TibetanMachineWeb glyph has a corresponding TibetanMachine glyph.
But if (font, ord) isn't (7, 91) and doesn't correspond to an
existing TibetanMachineWeb glyph, null is returned. In general,
though, this method may raise a runtime exception if you pass in a
(font, ord) that doesn't correspond to an existing
TibetanMachineWeb glyph. */
public static DuffCode mapTMWtoTM(int font, int ordinal) {
DuffCode ans = TMWtoTM[font][ordinal-32];
// comment this out to test via main(..):
ThdlDebug.verify(null != ans || (font == 7 && ordinal == 91));
return ans;
}
/** Tests the TMW-&gt;TM and TM-&gt;TMW mappings. */
public static void main(String[] args) {
int font, ord, count;
count = 0;
for (font = 0; font < 5; font++) {
for (ord = 32; ord < 255; ord++) {
if (mapTMtoTMW(font, ord) != null) {
count++;
}
}
System.out.println("Found " + count + " TM->TMW mappings (thus far).");
}
count = 0;
for (font = 0; font < 10; font++) {
for (ord = 32; ord < 127; ord++) {
if (mapTMWtoTM(font, ord) != null) {
count++;
}
}
System.out.println("Found " + count + " TMW->TM mappings (thus far).");
}
System.out.println("TMWtoTM: ");
for (font = 0; font < 10; font++) {
for (ord = 32; ord < 127; ord++) {
DuffCode dc;
if ((dc = mapTMWtoTM(font, ord)) != null) {
System.out.println(dc.getCharNum() + " "
+ (dc.getFontNum()-1) + " "
+ font + " "
+ ord);
}
}
}
System.out.println("TMtoTMW: (use sort -g -k 3 -k 4): ");
for (font = 0; font < 5; font++) {
for (ord = 32; ord < 255; ord++) {
DuffCode dc;
if ((dc = mapTMtoTMW(font, ord)) != null) {
System.out.println(ord + " " + font + " "
+ (dc.getFontNum()-1) + " "
+ dc.getCharNum());
}
}
}
}
private static DuffCode getTMtoTMW(int font, int code) { private static DuffCode getTMtoTMW(int font, int code) {
if (false) { // DLC FIXME: why was this here?
if (code > 255-32) { if (code > 255-32) {
switch (code) { switch (code) {
case 8218-32: //sby case 8218-32: //sby
@ -842,6 +970,7 @@ private static DuffCode getTMtoTMW(int font, int code) {
return null; return null;
} }
} }
}
return TMtoTMW[font][code]; return TMtoTMW[font][code];
} }
@ -947,7 +1076,7 @@ public static String getWylieForGlyph(DuffCode dc) {
// This error message is documented in // This error message is documented in
// www/htdocs/TMW_RTF_TO_THDL_WYLIE.html, so change them both // www/htdocs/TMW_RTF_TO_THDL_WYLIE.html, so change them both
// when you change this. // when you change this.
return "<<[[JSKAD_TMW_TO_WYLIE_ERROR_NO_SUCH_WYLIE: Cannot convert DuffCode " + dc + " to THDL Extended Wylie. Please see the documentation for the TMW font and transcribe this yourself.]]>>"; return "<<[[JSKAD_TMW_TO_WYLIE_ERROR_NO_SUCH_WYLIE: Cannot convert DuffCode " + dc.toString(true) + " to THDL Extended Wylie. Please see the documentation for the TMW font and transcribe this yourself.]]>>";
} }
return wylieForGlyph(hashKey); return wylieForGlyph(hashKey);
} }

View file

@ -22,6 +22,7 @@ a,i,u,e,o,I,U,ai,au,A,-i,-I
<?Other?> <?Other?>
_, ,/,|,!,:,;,@,#,$,%,(,),H,M,`,&,@# _, ,/,|,!,:,;,@,#,$,%,(,),H,M,`,&,@#
// FIXME: add these etc.: M^,<,>,{,},[,],?,~
<?Input:Punctuation?> <?Input:Punctuation?>
//_~32,1~0,32 //_~32,1~0,32
@ -42,6 +43,7 @@ $~38,5~~9,41~~~~~~~0F06
H~239,1~~8,92~~~~~~~0F7F H~239,1~~8,92~~~~~~~0F7F
M~~~8,91~~~~~~~0F7E M~~~8,91~~~~~~~0F7E
`~241,1~~8,94~~~~~~~0F83 `~241,1~~8,94~~~~~~~0F83
// I thought EWTS said 0F83 was M^, not `
&~177,4~~8,93~~~~~~~0F85 &~177,4~~8,93~~~~~~~0F85
@#~201,1~~9,40 @#~201,1~~9,40
@ -688,25 +690,26 @@ _~32,1~~1,32
~45,1~~1,45~~~~~~~0F0B ~45,1~~1,45~~~~~~~0F0B
_~32,1~~2,32 _~32,1~~2,32
~45,1~~2,45~~~~~~~0F0B ~45,1~~2,45~~~~~~~0F0B
_~32,1~~3,32 _~32,2~~3,32
~45,1~~3,45~~~~~~~0F0B ~45,2~~3,45~~~~~~~0F0B
_~32,1~~4,32 _~32,2~~4,32
~45,1~~4,45~~~~~~~0F0B ~45,2~~4,45~~~~~~~0F0B
_~32,1~~5,32 _~32,3~~5,32
~45,1~~5,45~~~~~~~0F0B ~45,3~~5,45~~~~~~~0F0B
_~32,1~~6,32 _~32,3~~6,32
~45,1~~6,45~~~~~~~0F0B ~45,3~~6,45~~~~~~~0F0B
_~32,1~~7,32 _~32,4~~7,32
~45,1~~7,45~~~~~~~0F0B ~45,4~~7,45~~~~~~~0F0B
_~32,1~~8,32 _~32,4~~8,32
~45,1~~8,45~~~~~~~0F0B ~45,4~~8,45~~~~~~~0F0B
_~32,1~~9,32 _~32,5~~9,32
~45,1~~9,45~~~~~~~0F0B ~45,5~~9,45~~~~~~~0F0B
_~32,1~~10,32 _~32,5~~10,32
~45,1~~10,45~~~~~~~0F0B ~45,5~~10,45~~~~~~~0F0B
//bindus //bindus
`~241,1~~8,94~~~~~~~0F83 `~241,1~~8,94~~~~~~~0F83
// I thought EWTS said 0F83 was M^, not `
iM~243,1~~8,96 iM~243,1~~8,96
iM~244,1~~8,97 iM~244,1~~8,97
-iM~245,1~~8,98 -iM~245,1~~8,98
@ -921,12 +924,12 @@ vhite and black pebble~119,5~~9,119~~~~~~~0F1F
triple vhite pebble~120,5~~9,120~~~~~~~0F1C triple vhite pebble~120,5~~9,120~~~~~~~0F1C
triple black pebble~121,5~~9,121~~~~~~~0FCF triple black pebble~121,5~~9,121~~~~~~~0FCF
122,5~~9,122 ~122,5~~9,122
123,5~~9,123 ~123,5~~9,123
124,5~~9,124 ~124,5~~9,124
125,5~~9,125 ~125,5~~9,125
126,5~~9,126 ~126,5~~9,126
128,5~~10,33 ~128,5~~10,33
logo sign chad.rtags~129,5~~10,34~~~~~~~0F15 logo sign chad.rtags~129,5~~10,34~~~~~~~0F15
logo sign lhag.rtags~130,5~~10,35~~~~~~~0F16 logo sign lhag.rtags~130,5~~10,35~~~~~~~0F16
@ -994,7 +997,7 @@ zhu.yig.mgo.rgyan~33,5~~9,33~~~~~~~0F0A
bka'.shog.mgo.rgyan~34,5~~9,34 bka'.shog.mgo.rgyan~34,5~~9,34
mnyam.yig.mgo.rgyan~35,5~~9,35 mnyam.yig.mgo.rgyan~35,5~~9,35
mnyam.yig.mgo.rgyan~36,5~~9,36~~~~~~~0F09 mnyam.yig.mgo.rgyan~36,5~~9,36~~~~~~~0F09
37,5~~9,37 ~37,5~~9,37
zla tse gcig~210,1~~9,38~~~~~~~0F04 zla tse gcig~210,1~~9,38~~~~~~~0F04
half zla tse gcig~200,1~~9,39~~~~~~~0F05 half zla tse gcig~200,1~~9,39~~~~~~~0F05
// zla tse gnyis~201,1~~9,40 is now punctuation. // zla tse gnyis~201,1~~9,40 is now punctuation.