Accurate TM->TMW and TMW->TM mappings are now available. I've

verified this extensively and have full confidence that these mappings
agree with Tony Duff's Tibetan! 5.1 documentation (except as described
below).

To get them, I had to disregard Tony Duff's tables for a few glyphs: the
characters with ordinal 32 and 45 (space and hyphen in Roman ASCII,
space and tsheg in Tibetan).  For these glyphs, we must have mappings
from TibetanMachineSkt4.32 to something, etc., and those mappings were
not present.  I've normalized the mapping for these glyphs, as it is arbitrary
because the same two glyphs just appear fifteen times each.
This commit is contained in:
dchandler 2003-05-31 20:13:15 +00:00
parent a4bc23a9ab
commit bfacd6c998
3 changed files with 192 additions and 55 deletions

View file

@ -23,18 +23,13 @@ import java.util.StringTokenizer;
import org.thdl.util.ThdlDebug;
/**
* A wrapper for the primitive data types
* that combine to represent a Tibetan glyph in the
* TibetanMachineWeb family of fonts.
* An immutable representation of a Tibetan glyph in the
* TibetanMachineWeb or TibetanMachine families of fonts.
*
* A DuffCode consists of a font number, a character, and
* a character number. A font identification and a character
* (or character number) are sufficient to uniquely identify
* any TibetanMachineWeb glyph.
* A DuffCode consists of a font number, a character, and a character
* number. A font identification and a character are sufficient to
* uniquely identify any TibetanMachineWeb or TibetanMachine glyph.
*
* Note that DuffCodes are sometimes used, internally, to represent
* glyphs in other fonts, e.g. the TibetanMachine font. But mainly
* they represent TibetanMachineWeb glyphs.
* @author Edward Garrett, Tibetan and Himalayan Digital Library
* @version 1.0 */
@ -154,10 +149,20 @@ public final class DuffCode {
}
/**
* @return a string representation of this object
*/
* @return a string representation of this object */
public String toString() {
return "<duffcode font=" + TibetanMachineWeb.tmwFontNames[fontNum]
return "<duffcode font=" + fontNum
+ " charNum=" + charNum + " character="
+ new Character(getCharacter()).toString() + "/>";
}
/**
* @param TMW if this DuffCode represents a TMW glyph, not a TM glyph
* @return a string representation of this object */
public String toString(boolean TMW) {
return "<duffcode font="
+ (TMW
? TibetanMachineWeb.tmwFontNames
: TibetanMachineWeb.tmFontNames)[fontNum]
+ " charNum=" + charNum + " character="
+ new Character(getCharacter()).toString() + "/>";
}

View file

@ -69,7 +69,8 @@ public class TibetanMachineWeb implements THDLWylieConstants {
private static Map tibHash = new HashMap();
private static Map binduMap = new HashMap();
private static String[][] toHashKey = new String[11][95]; //note: toHashKey[0][..] is not used
private static DuffCode[][] TMtoTMW = new DuffCode[5][255-32];
private static DuffCode[][] TMtoTMW = new DuffCode[5][255-32]; // ordinal 255 doesn't occur in TM
private static DuffCode[][] TMWtoTM = new DuffCode[10][127-32]; // ordinal 127 doesn't occur in TMW
private static String fileName = "tibwn.ini";
private static final String DELIMITER = "~";
private static Set top_vowels;
@ -354,15 +355,21 @@ public class TibetanMachineWeb implements THDLWylieConstants {
;
else if (line.equals("")) //empty string
;
else if (!ignore) {
else {
StringTokenizer st = new StringTokenizer(line,DELIMITER,true);
String wylie = new String();
DuffCode[] duffCodes = new DuffCode[11];
String wylie = null;
DuffCode[] duffCodes;
if (ignore) {
duffCodes = new DuffCode[TMW + 1];
} else {
duffCodes = new DuffCode[11];
}
int k = 0;
while (st.hasMoreTokens()) {
while (st.hasMoreTokens()
&& (!ignore || (k <= 3 /* 3 from 'case 3:' */))) {
String val = st.nextToken();
if (val.equals(DELIMITER))
@ -371,7 +378,9 @@ public class TibetanMachineWeb implements THDLWylieConstants {
else if (!val.equals("")) {
switch (k) {
case 0: //wylie key
if (!ignore) {
wylie = val;
}
break;
case 1:
@ -379,11 +388,13 @@ public class TibetanMachineWeb implements THDLWylieConstants {
break;
case 2: //reduced-size character if there is one
if (!ignore) {
duffCodes[REDUCED_C] = new DuffCode(val,true);
}
break;
case 3: //TibetanMachineWeb code
duffCodes[k-1/* TMW */] = new DuffCode(val,true);
duffCodes[TMW] = new DuffCode(val,true);
// TibetanMachineWeb7.91, for
// example, has no TM(win32)
// equivalent (though it has a
@ -391,35 +402,63 @@ public class TibetanMachineWeb implements THDLWylieConstants {
// test for null here:
if (null != duffCodes[TM]) {
TMtoTMW[duffCodes[TM].getFontNum()-1][duffCodes[TM].getCharNum()-32]
= duffCodes[TMW];
= duffCodes[TMW]; // TM->TMW mapping
}
// but no null test is necessary
// here for either the TMW or the
// TM glyph (though the TM glyph
// could well be null):
TMWtoTM[duffCodes[TMW].getFontNum()-1][duffCodes[TMW].getCharNum()-32]
= duffCodes[TM]; // TMW->TM mapping
break;
// Vowels etc. to use with this glyph:
case 4:
case 5:
case 6:
case 7:
case 8:
case 9:
if (!ignore) {
duffCodes[k-1] = new DuffCode(val,true);
}
break;
case 10: //Unicode: ignore for now
ThdlDebug.verify(val.length() == 4);
try {
int x;
ThdlDebug.verify((x = Integer.parseInt(val, 16)) >= 0x0F00
&& x <= 0x0FFF);
} catch (NumberFormatException e) {
ThdlDebug.verify(false);
}
break;
case 11: //half-height character if there is one
if (!ignore) {
duffCodes[HALF_C] = new DuffCode(val,true);
}
break;
case 12: //special bindu-value if vowel+bindu are one glyph
if (!ignore) {
DuffCode binduCode = new DuffCode(val,true);
binduMap.put(duffCodes[TMW],binduCode);
}
break;
}
}
}
if (hashOn)
tibHash.put(wylie,duffCodes);
if (!ignore) {
if (null == wylie)
throw new Error(fileName
+ " has a line ^"
+ DELIMITER
+ " which means that no Wylie is assigned. That isn't supported.");
if (hashOn) {
tibHash.put(wylie, duffCodes);
}
int font = duffCodes[2].getFontNum();
int code = duffCodes[2].getCharNum()-32;
@ -427,6 +466,7 @@ public class TibetanMachineWeb implements THDLWylieConstants {
}
}
}
}
catch (IOException e) {
System.out.println("file Disappeared");
ThdlDebug.noteIffyCode();
@ -811,7 +851,95 @@ public static DuffCode getHalfHeightGlyph(String hashKey) {
return dc[REDUCED_C];
}
/** Returns the DuffCode for the TibetanMachineWeb glyph corresponding
to the given TibetanMachine font
(0=norm,1=Skt1,2=Skt2,3=Skt3,4=Skt4) and character(32-254).
Null is never returned for an existing TibetanMachine glyph,
because every TibetanMachine glyph has a corresponding
TibetanMachineWeb glyph. But if (font, ord) doesn't correspond to
an existing TibetanMachine glyph, null is returned. In general,
though, this method may raise a runtime exception if you pass in a
(font, ord) that doesn't correspond to an existing TibetanMachine
glyph. */
public static DuffCode mapTMtoTMW(int font, int ordinal) {
DuffCode ans = TMtoTMW[font][ordinal-32];
// comment this out to test via main(..):
ThdlDebug.verify(null != ans);
return ans;
}
/** Returns the DuffCode for the TibetanMachine glyph corresponding to
the given TibetanMachineWeb font
(0=TibetanMachineWeb,1=TibetanMachineWeb1,...) and character(32-127).
Null is returned for an existing TibetanMachineWeb glyph only if
that glyph is TibetanMachineWeb7.91, because every other
TibetanMachineWeb glyph has a corresponding TibetanMachine glyph.
But if (font, ord) isn't (7, 91) and doesn't correspond to an
existing TibetanMachineWeb glyph, null is returned. In general,
though, this method may raise a runtime exception if you pass in a
(font, ord) that doesn't correspond to an existing
TibetanMachineWeb glyph. */
public static DuffCode mapTMWtoTM(int font, int ordinal) {
DuffCode ans = TMWtoTM[font][ordinal-32];
// comment this out to test via main(..):
ThdlDebug.verify(null != ans || (font == 7 && ordinal == 91));
return ans;
}
/** Tests the TMW-&gt;TM and TM-&gt;TMW mappings. */
public static void main(String[] args) {
int font, ord, count;
count = 0;
for (font = 0; font < 5; font++) {
for (ord = 32; ord < 255; ord++) {
if (mapTMtoTMW(font, ord) != null) {
count++;
}
}
System.out.println("Found " + count + " TM->TMW mappings (thus far).");
}
count = 0;
for (font = 0; font < 10; font++) {
for (ord = 32; ord < 127; ord++) {
if (mapTMWtoTM(font, ord) != null) {
count++;
}
}
System.out.println("Found " + count + " TMW->TM mappings (thus far).");
}
System.out.println("TMWtoTM: ");
for (font = 0; font < 10; font++) {
for (ord = 32; ord < 127; ord++) {
DuffCode dc;
if ((dc = mapTMWtoTM(font, ord)) != null) {
System.out.println(dc.getCharNum() + " "
+ (dc.getFontNum()-1) + " "
+ font + " "
+ ord);
}
}
}
System.out.println("TMtoTMW: (use sort -g -k 3 -k 4): ");
for (font = 0; font < 5; font++) {
for (ord = 32; ord < 255; ord++) {
DuffCode dc;
if ((dc = mapTMtoTMW(font, ord)) != null) {
System.out.println(ord + " " + font + " "
+ (dc.getFontNum()-1) + " "
+ dc.getCharNum());
}
}
}
}
private static DuffCode getTMtoTMW(int font, int code) {
if (false) { // DLC FIXME: why was this here?
if (code > 255-32) {
switch (code) {
case 8218-32: //sby
@ -842,6 +970,7 @@ private static DuffCode getTMtoTMW(int font, int code) {
return null;
}
}
}
return TMtoTMW[font][code];
}
@ -947,7 +1076,7 @@ public static String getWylieForGlyph(DuffCode dc) {
// This error message is documented in
// www/htdocs/TMW_RTF_TO_THDL_WYLIE.html, so change them both
// when you change this.
return "<<[[JSKAD_TMW_TO_WYLIE_ERROR_NO_SUCH_WYLIE: Cannot convert DuffCode " + dc + " to THDL Extended Wylie. Please see the documentation for the TMW font and transcribe this yourself.]]>>";
return "<<[[JSKAD_TMW_TO_WYLIE_ERROR_NO_SUCH_WYLIE: Cannot convert DuffCode " + dc.toString(true) + " to THDL Extended Wylie. Please see the documentation for the TMW font and transcribe this yourself.]]>>";
}
return wylieForGlyph(hashKey);
}

View file

@ -22,6 +22,7 @@ a,i,u,e,o,I,U,ai,au,A,-i,-I
<?Other?>
_, ,/,|,!,:,;,@,#,$,%,(,),H,M,`,&,@#
// FIXME: add these etc.: M^,<,>,{,},[,],?,~
<?Input:Punctuation?>
//_~32,1~0,32
@ -42,6 +43,7 @@ $~38,5~~9,41~~~~~~~0F06
H~239,1~~8,92~~~~~~~0F7F
M~~~8,91~~~~~~~0F7E
`~241,1~~8,94~~~~~~~0F83
// I thought EWTS said 0F83 was M^, not `
&~177,4~~8,93~~~~~~~0F85
@#~201,1~~9,40
@ -688,25 +690,26 @@ _~32,1~~1,32
~45,1~~1,45~~~~~~~0F0B
_~32,1~~2,32
~45,1~~2,45~~~~~~~0F0B
_~32,1~~3,32
~45,1~~3,45~~~~~~~0F0B
_~32,1~~4,32
~45,1~~4,45~~~~~~~0F0B
_~32,1~~5,32
~45,1~~5,45~~~~~~~0F0B
_~32,1~~6,32
~45,1~~6,45~~~~~~~0F0B
_~32,1~~7,32
~45,1~~7,45~~~~~~~0F0B
_~32,1~~8,32
~45,1~~8,45~~~~~~~0F0B
_~32,1~~9,32
~45,1~~9,45~~~~~~~0F0B
_~32,1~~10,32
~45,1~~10,45~~~~~~~0F0B
_~32,2~~3,32
~45,2~~3,45~~~~~~~0F0B
_~32,2~~4,32
~45,2~~4,45~~~~~~~0F0B
_~32,3~~5,32
~45,3~~5,45~~~~~~~0F0B
_~32,3~~6,32
~45,3~~6,45~~~~~~~0F0B
_~32,4~~7,32
~45,4~~7,45~~~~~~~0F0B
_~32,4~~8,32
~45,4~~8,45~~~~~~~0F0B
_~32,5~~9,32
~45,5~~9,45~~~~~~~0F0B
_~32,5~~10,32
~45,5~~10,45~~~~~~~0F0B
//bindus
`~241,1~~8,94~~~~~~~0F83
// I thought EWTS said 0F83 was M^, not `
iM~243,1~~8,96
iM~244,1~~8,97
-iM~245,1~~8,98
@ -921,12 +924,12 @@ vhite and black pebble~119,5~~9,119~~~~~~~0F1F
triple vhite pebble~120,5~~9,120~~~~~~~0F1C
triple black pebble~121,5~~9,121~~~~~~~0FCF
122,5~~9,122
123,5~~9,123
124,5~~9,124
125,5~~9,125
126,5~~9,126
128,5~~10,33
~122,5~~9,122
~123,5~~9,123
~124,5~~9,124
~125,5~~9,125
~126,5~~9,126
~128,5~~10,33
logo sign chad.rtags~129,5~~10,34~~~~~~~0F15
logo sign lhag.rtags~130,5~~10,35~~~~~~~0F16
@ -994,7 +997,7 @@ zhu.yig.mgo.rgyan~33,5~~9,33~~~~~~~0F0A
bka'.shog.mgo.rgyan~34,5~~9,34
mnyam.yig.mgo.rgyan~35,5~~9,35
mnyam.yig.mgo.rgyan~36,5~~9,36~~~~~~~0F09
37,5~~9,37
~37,5~~9,37
zla tse gcig~210,1~~9,38~~~~~~~0F04
half zla tse gcig~200,1~~9,39~~~~~~~0F05
// zla tse gnyis~201,1~~9,40 is now punctuation.