I've validated by hand the TM<->TMW mappings. A few things changed, so

no previous TM->TMW or TMW->TM conversions can be trusted.
This commit is contained in:
dchandler 2003-06-30 02:24:11 +00:00
parent dc03083433
commit 229536884f
3 changed files with 263 additions and 212 deletions

View file

@ -581,30 +581,6 @@ public class TibetanDocument extends DefaultStyledDocument {
numAttemptedReplacements);
}
/** For debugging only. Start with an empty document, and call
this on it. You'll get all the TibetanMachine glyphs
inserted, in order, into your document. */
private void insertAllTMGlyphs() {
int font;
int ord;
DuffData[] equivalent = new DuffData[1];
equivalent[0] = new DuffData();
int count = 0;
for (font = 0; font < 5; font++) {
for (ord = 32; ord < 255; ord++) {
if (TibetanMachineWeb.mapTMtoTMW(font, ord, 0) != null) {
equivalent[0].setData((char)ord, font + 1);
try {
insertDuff(tibetanFontSize, count++, equivalent, false);
} catch (NullPointerException e) {
System.err.println("nullpointerexception happened: font is " + font + " ord is " + ord);
}
}
}
}
}
/** This setting determines whether the formatting is preserved,
but with infinite loops in it, or is not preserved, but works
well. Inserting + removing must be used rather than replacing
@ -781,7 +757,6 @@ public class TibetanDocument extends DefaultStyledDocument {
You'll see it coming (TM->TMW) and going (if you do
TMW->TM again). I wonder if finalEndPos isn't one shy
of where you'd think it would be. FIXME */
ThdlDebug.noteIffyCode();
}
return ceh.errorReturn;
}
@ -1130,6 +1105,76 @@ public class TibetanDocument extends DefaultStyledDocument {
return (Element[])v.toArray(arrayType);
}
/** For debugging only. Start with an empty document, and call
this on it. You'll get all the TibetanMachine glyphs
inserted, in order, into your document. */
private void insertAllTMGlyphs() {
int font;
int ord;
DuffData[] equivalent = new DuffData[1];
equivalent[0] = new DuffData();
int count = 0;
for (font = 0; font < 5; font++) {
for (ord = 32; ord < 255; ord++) {
if (TibetanMachineWeb.mapTMtoTMW(font, ord, 0) != null) {
equivalent[0].setData((char)ord, font + 1);
try {
insertDuff(tibetanFontSize, count++, equivalent, false);
} catch (NullPointerException e) {
System.err.println("nullpointerexception happened: font is " + font + " ord is " + ord);
}
}
}
}
}
/** I used this to create a document that helped me validate the
TM->TMW conversion. */
private void insertAllTMGlyphs2(MutableAttributeSet roman) {
int font;
int ord;
DuffData[] equivalent = new DuffData[1];
equivalent[0] = new DuffData();
DuffData[] tmwEquivalent = new DuffData[1];
tmwEquivalent[0] = new DuffData();
DuffData[] achen = new DuffData[1];
achen[0] = new DuffData();
achen[0].setData((char)62, 1);
DuffData[] newline = new DuffData[1];
newline[0] = new DuffData();
newline[0].setData((char)10, 1);
DuffData[] space = new DuffData[1];
space[0] = new DuffData();
space[0].setData((char)32, 1);
int count = 0;
for (font = 0; font < 5; font++) {
for (ord = 32; ord < 255; ord++) {
DuffCode tmw;
if ((tmw = TibetanMachineWeb.mapTMtoTMW(font, ord, 0)) != null) {
equivalent[0].setData((char)ord, font + 1);
tmwEquivalent[0].setData(tmw.getCharacter(), tmw.getFontNum());
try {
insertDuff(72, count++, achen, false);
insertDuff(72, count++, equivalent, false);
insertDuff(72, count++, achen, false);
insertDuff(72, count++, tmwEquivalent, true);
} catch (NullPointerException e) {
System.err.println("nullpointerexception happened: font is " + font + " ord is " + ord);
}
try {
String s = " font " + (font+1) + "; ord " + ord + "\n";
insertString(count, s, roman);
count += s.length();
} catch (BadLocationException e) {
throw new Error("badness");
}
}
}
}
}
}
/** A helper class used by TibetanDocument.convertHelper(..). */

View file

@ -1043,6 +1043,12 @@ public static DuffCode mapTMtoTMW(int font, int ordinal, int suggestedFont) {
return null;
}
}
// We map TibetanMachineSkt1.45, TibetanMachineSkt1.45,
// TibetanMachineSkt1.45, and TibetanMachineSkt1.45 to
// TibetanMachineWeb*.45, even though they're actually just
// garbage, since TibetanMachine.45 is the only tsheg in TM. We
// assume that a machine goofed along the way. (FIXME: optionally
// warn.)
if ((0 != suggestedFont) && (32 == ordinal || 45 == ordinal)) {
return new DuffCode(suggestedFont, (char)ordinal); // FIXME: don't create a new one each time; it wastes heap
}
@ -1096,7 +1102,12 @@ public static DuffCode mapTMWtoTM(int font, int ordinal, int suggestedFont) {
return null;
}
}
if ((0 != suggestedFont) && (32 == ordinal || 45 == ordinal)) {
if (45 == ordinal) {
// TibetanMachine.45 is a tsheg, but TibetanMachineSkt2.45
// etc. are not tshegs.
return new DuffCode(1, (char)ordinal);
}
if ((0 != suggestedFont) && (32 == ordinal)) {
return new DuffCode(suggestedFont, (char)ordinal); // FIXME: don't create a new one each time; it wastes heap
}
DuffCode ans = TMWtoTM[font][ordinal-32];

View file

@ -495,8 +495,8 @@ n+n+y~123,3~~6,34~1,109~6,122~1,123~1,125~6,108~6,115~f53,fa3,f61
n+p~124,3~~6,35~1,109~6,120~1,123~1,125~6,106~6,113~f53,fa4
n+p+r~125,3~~6,36~1,109~6,121~1,123~1,125~6,107~6,114~f53,fa4,fb2
n+ph~126,3~~6,37~1,109~6,120~1,123~1,125~6,106~6,113~f53,fa5
n+m~253,3~~6,38~1,109~6,120~1,123~1,125~6,106~6,113~f53,fa8
n+b+h+y~128,3~~6,39~1,109~6,125~1,123~1,125~6,111~6,118~f53,fa6,fb7,fb1
n+m~253,3~~6,39~1,109~6,120~1,123~1,125~6,106~6,113~f53,fa8
n+b+h+y~128,3~~6,38~1,109~6,125~1,123~1,125~6,111~6,118~f53,fa6,fb7,fb1
n+ts~129,3~~6,40~1,109~6,120~1,123~1,125~6,106~6,113~f53,fa9
n+y~130,3~~6,41~1,109~6,120~1,123~1,125~6,106~6,113~f53,fb1
n+r~131,3~~6,42~1,109~6,120~1,123~1,125~6,106~6,113~f53,fb2
@ -737,182 +737,6 @@ a+r+y~145,4~~8,65~1,109~8,121~1,123~1,125~8,107~8,114~f68,fb2,fb1
8+1/2~78,5~~9,81~~~~~~~0F31
9+1/2~79,5~~9,82~~~~~~~0F32
// cantillation sign,heavy beat:
\tmw8082~80,5~~9,83~~~~~~~0FC0
// cantillation sign,light beat:
\tmw8083~81,5~~9,84~~~~~~~0FC1
// cantillation sign,cang.te-u:
\tmw8084~82,5~~9,85~~~~~~~0FC2
// cantillation sign sbub.chal:
\tmw8085~83,5~~9,86~~~~~~~0FC3
// zhi.rol.btags:
\tmw8086~84,5~~9,87~~~~~~~none
// sher.bu:
\tmw8088~90,5~~9,88~~~~~~~none
// kuruka:
\tmw8090~92,5~~9,90~~~~~~~none
// no name:
\tmw8091~93,5~~9,91~~~~~~~none
// DLC FIXME: aren't these elsewhere in unicode, though? 534d, e.g.? use it?
// yungs.drung (reversed):
\tmw8097~97,5~~9,97~~~~~~~none
// yungs.drung (standard):
\tmw8098~98,5~~9,98~~~~~~~none
// mchan rtags trailing:
\tmw8099~99,5~~9,99~~~~~~~none
// mchan rtags leading:
\tmw8100~100,5~~9,100~~~~~~~none
// mtshan.rtags:
\tmw8101~101,5~~9,101~~~~~~~0F37
// mtshan.rtags zhes.sa:
\tmw8102~102,5~~9,102~~~~~~~0F35
// che.mgo:
\tmw8103~103,5~~9,103~~~~~~~0F38
// kuruka:
\tmw8104~104,5~~9,104~~~~~~~0FBE
// Kuruka.mig.lda:
\tmw8105~105,5~~9,105~~~~~~~0FBF
// ornament:
\tmw8106~106,5~~9,106~~~~~~~0F36
// yang.rtags:
\tmw8107~107,5~~9,107~~~~~~~0F87
// lci.rtags:
\tmw8108~108,5~~9,108~~~~~~~0F86
// mchu.can:
\tmw8109~109,5~~9,109~~~~~~~0F89
// gru.can.rgyings:
\tmw8110~110,5~~9,110~~~~~~~0F8A
// gru.med.gyings:
\tmw8111~111,5~~9,111~~~~~~~0F8B
// single white pebble:
\tmw8115~115,5~~9,115~~~~~~~0F1A
// single black pebble:
\tmw8116~116,5~~9,116~~~~~~~0F1D
// double white pebble:
\tmw8117~117,5~~9,117~~~~~~~0F1B
// double black pebble:
\tmw8118~118,5~~9,118~~~~~~~0F1E
// white and black pebble:
\tmw8119~119,5~~9,119~~~~~~~0F1F
// triple white pebble:
\tmw8120~120,5~~9,120~~~~~~~0F1C
// triple black pebble:
\tmw8121~121,5~~9,121~~~~~~~0FCF
\tmw8122~122,5~~9,122~~~~~~~none
\tmw8123~123,5~~9,123~~~~~~~none
\tmw8124~124,5~~9,124~~~~~~~none
\tmw8125~125,5~~9,125~~~~~~~none
\tmw8126~126,5~~9,126~~~~~~~none
\tmw9033~128,5~~10,33~~~~~~~none
// logo sign chad.rtags:
\tmw9034~129,5~~10,34~~~~~~~0F15
// logo sign lhag.rtags:
\tmw9035~130,5~~10,35~~~~~~~0F16
// sgra.gcan.char.rtags:
\tmw9036~131,5~~10,36~~~~~~~0F17
// khyud.pa:
\tmw9037~132,5~~10,37~~~~~~~0F18
// sdong.tshugs:
\tmw9038~133,5~~10,38~~~~~~~0F19
// utsama ka:
\tmw2059~57,2~~3,59~1,109~4,120~1,125~1,123~4,106~4,113~f88,f90
// utsama kha:
\tmw2060~58,2~~3,60~1,109~4,120~1,125~1,123~4,106~4,113~f88,f91
// tza.'phru:
\tmw7067~174,4~~8,67~~~~~~~0F39
// DLC FIXME: 0F39, yes?
// reversed tza.'phru:
\tmw7068~145,5~~8,68~~~~~~~0F39
// damaru.rtags:
\tmw7072~178,4~~8,72~~~~~~~0F88
// half a.chen:
\tmw7073~179,4~~8,73~~~~~~~0F01
// DLC f68,fa0,f80,f72 comes close, but fa0 would be larger.
// ITHI secret sign:
\tmw7074~180,4~~8,74~~~~~~~none
// Terton's mark:
\tmw7075~181,4~~8,75~~~~~~~none
// Terton's mark:
\tmw7076~182,4~~8,76~~~~~~~none
// Terton's mark:
\tmw7077~183,4~~8,77~~~~~~~none
// Note that this is close to 0f11:
// Terton's mark:
\tmw7078~149,5~~8,78~~~~~~~none
// Terma mark:
\tmw7079~184,4~~8,79~~~~~~~none
// Terma mark:
\tmw7080~185,4~~8,80~~~~~~~none
// Terma mark:
\tmw7081~186,4~~8,81~~~~~~~none
// Mark:
\tmw7082~187,4~~8,82~~~~~~~none
// DLC see chinese unicode:
// Chinese letter:
\tmw9060~155,5~~10,60~~~~~~~none
// dril.bu:
\tmw9062~190,5~~10,62~~~~~~~0FC4
// rdo.rje:
\tmw9063~191,5~~10,63~~~~~~~0FC5
// padma.gdan:
\tmw9064~192,5~~10,64~~~~~~~0FC6
// rdo.rje.rgya.gram:
\tmw9065~193,5~~10,65~~~~~~~0FC7
// phur.ba:
\tmw9066~194,5~~10,66~~~~~~~0FC8
// nor.bu:
\tmw9067~195,5~~10,67~~~~~~~0FC9
// nor.bu.gnyis.khyil:
\tmw9068~196,5~~10,68~~~~~~~0FCA
// nor.bu.gsum.khyil:
\tmw9069~197,5~~10,69~~~~~~~0FCB
// nor.bu.bzhi.khyil:
\tmw9070~198,5~~10,70~~~~~~~0FCC
// bindu + datse + thigle:
\tmw7095~242,1~~8,95~~~~~~~none
// DLC FIXME: 0F7E? Or is there no Unicode that corresponds to this? WE GOT 2 7Es!
\tmw7090~238,1~~8,90~~~~~~~0F7E
// zhu.yig.mgo.rgyan:
\tmw8033~33,5~~9,33~~~~~~~0F0A
// bka'.shog.mgo.rgyan:
\tmw8034~34,5~~9,34~~~~~~~none
// mnyam.yig.mgo.rgyan:
\tmw8035~35,5~~9,35~~~~~~~none
// mnyam.yig.mgo.rgyan:
\tmw8036~36,5~~9,36~~~~~~~0F09
// nameless sign:
\tmw8037~37,5~~9,37~~~~~~~none
// shad (hooked) + single tsheg:
\tmw8044~41,5~~9,44~~~~~~~none
// shad + double tsheg:
\tmw8046~42,5~~9,46~~~~~~~none
// sbrul.shad + single ornament:
\tmw8048~44,5~~9,48~~~~~~~none
// sbrul.shad + double ornament:
\tmw8049~46,5~~9,49~~~~~~~none
// sbrul.shad variant form:
\tmw8050~47,5~~9,50~~~~~~~none
// reversed hu:
\tmw9043~147,5~~10,43~~~~~~~none
// Inverted ha:
\tmw9044~148,5~~10,44~~~~~~~none
<?ToWylie?>
// DLC is this for (40 etc.),7c,60,72 ??? or for standalone f60,f72,f7c?
// Special combination:
@ -928,21 +752,13 @@ _~32,1~~1,32~~~~~~~0020
_~32,1~~2,32~~~~~~~0020
~45,1~~2,45~~~~~~~0F0B
_~32,2~~3,32~~~~~~~0020
~45,2~~3,45~~~~~~~0F0B
_~32,2~~4,32~~~~~~~0020
~45,2~~4,45~~~~~~~0F0B
_~32,3~~5,32~~~~~~~0020
~45,3~~5,45~~~~~~~0F0B
_~32,3~~6,32~~~~~~~0020
~45,3~~6,45~~~~~~~0F0B
_~32,4~~7,32~~~~~~~0020
~45,4~~7,45~~~~~~~0F0B
_~32,4~~8,32~~~~~~~0020
~45,4~~8,45~~~~~~~0F0B
_~32,5~~9,32~~~~~~~0020
~45,5~~9,45~~~~~~~0F0B
_~32,5~~10,32~~~~~~~0020
~45,5~~10,45~~~~~~~0F0B
//bindus
// DLC FIXME: bindu is 7e or not?
@ -1138,3 +954,182 @@ y~175,4~~8,70~~~~~~~0FB1
// ra.btags:
r~176,4~~8,71~~~~~~~0FB2
// DLC FIXME: remove the <?Ignore?> tag and you get TMW->Wylie
// mappings for all these.
<?Ignore?>
// cantillation sign,heavy beat:
\tmw8082~80,5~~9,83~~~~~~~0FC0
// cantillation sign,light beat:
\tmw8083~81,5~~9,84~~~~~~~0FC1
// cantillation sign,cang.te-u:
\tmw8084~82,5~~9,85~~~~~~~0FC2
// cantillation sign sbub.chal:
\tmw8085~83,5~~9,86~~~~~~~0FC3
// zhi.rol.btags:
\tmw8086~84,5~~9,87~~~~~~~none
// sher.bu:
\tmw8088~90,5~~9,88~~~~~~~none
// kuruka:
\tmw8090~92,5~~9,90~~~~~~~none
// no name:
\tmw8091~93,5~~9,91~~~~~~~none
// DLC FIXME: aren't these elsewhere in unicode, though? 534d, e.g.? use it?
// yungs.drung (reversed):
\tmw8097~97,5~~9,97~~~~~~~none
// yungs.drung (standard):
\tmw8098~98,5~~9,98~~~~~~~none
// mchan rtags trailing:
\tmw8099~99,5~~9,99~~~~~~~none
// mchan rtags leading:
\tmw8100~100,5~~9,100~~~~~~~none
// mtshan.rtags:
\tmw8101~101,5~~9,101~~~~~~~0F37
// mtshan.rtags zhes.sa:
\tmw8102~102,5~~9,102~~~~~~~0F35
// che.mgo:
\tmw8103~103,5~~9,103~~~~~~~0F38
// kuruka:
\tmw8104~104,5~~9,104~~~~~~~0FBE
// Kuruka.mig.lda:
\tmw8105~105,5~~9,105~~~~~~~0FBF
// ornament:
\tmw8106~106,5~~9,106~~~~~~~0F36
// yang.rtags:
\tmw8107~107,5~~9,107~~~~~~~0F87
// lci.rtags:
\tmw8108~108,5~~9,108~~~~~~~0F86
// mchu.can:
\tmw8109~109,5~~9,109~~~~~~~0F89
// gru.can.rgyings:
\tmw8110~110,5~~9,110~~~~~~~0F8A
// gru.med.gyings:
\tmw8111~111,5~~9,111~~~~~~~0F8B
// single white pebble:
\tmw8115~115,5~~9,115~~~~~~~0F1A
// single black pebble:
\tmw8116~116,5~~9,116~~~~~~~0F1D
// double white pebble:
\tmw8117~117,5~~9,117~~~~~~~0F1B
// double black pebble:
\tmw8118~118,5~~9,118~~~~~~~0F1E
// white and black pebble:
\tmw8119~119,5~~9,119~~~~~~~0F1F
// triple white pebble:
\tmw8120~120,5~~9,120~~~~~~~0F1C
// triple black pebble:
\tmw8121~121,5~~9,121~~~~~~~0FCF
\tmw8122~122,5~~9,122~~~~~~~none
\tmw8123~123,5~~9,123~~~~~~~none
\tmw8124~124,5~~9,124~~~~~~~none
\tmw8125~125,5~~9,125~~~~~~~none
\tmw8126~126,5~~9,126~~~~~~~none
\tmw9033~128,5~~10,33~~~~~~~none
// logo sign chad.rtags:
\tmw9034~129,5~~10,34~~~~~~~0F15
// logo sign lhag.rtags:
\tmw9035~130,5~~10,35~~~~~~~0F16
// sgra.gcan.char.rtags:
\tmw9036~131,5~~10,36~~~~~~~0F17
// khyud.pa:
\tmw9037~132,5~~10,37~~~~~~~0F18
// sdong.tshugs:
\tmw9038~133,5~~10,38~~~~~~~0F19
// utsama ka:
\tmw2059~57,2~~3,59~1,109~4,120~1,125~1,123~4,106~4,113~f88,f90
// utsama kha:
\tmw2060~58,2~~3,60~1,109~4,120~1,125~1,123~4,106~4,113~f88,f91
// tza.'phru:
\tmw7067~174,4~~8,67~~~~~~~0F39
// DLC FIXME: 0F39, yes?
// reversed tza.'phru:
\tmw7068~145,5~~8,68~~~~~~~0F39
// damaru.rtags:
\tmw7072~178,4~~8,72~~~~~~~0F88
// half a.chen:
\tmw7073~179,4~~8,73~~~~~~~0F01
// DLC f68,fa0,f80,f72 comes close, but fa0 would be larger.
// ITHI secret sign:
\tmw7074~180,4~~8,74~~~~~~~none
// Terton's mark:
\tmw7075~181,4~~8,75~~~~~~~none
// Terton's mark:
\tmw7076~182,4~~8,76~~~~~~~none
// Terton's mark:
\tmw7077~183,4~~8,77~~~~~~~none
// Note that this is close to 0f11:
// Terton's mark:
\tmw7078~149,5~~8,78~~~~~~~none
// Terma mark:
\tmw7079~184,4~~8,79~~~~~~~none
// Terma mark:
\tmw7080~185,4~~8,80~~~~~~~none
// Terma mark:
\tmw7081~186,4~~8,81~~~~~~~none
// Mark:
\tmw7082~187,4~~8,82~~~~~~~none
// DLC see chinese unicode:
// Chinese letter:
\tmw9060~155,5~~10,60~~~~~~~none
// dril.bu:
\tmw9062~190,5~~10,62~~~~~~~0FC4
// rdo.rje:
\tmw9063~191,5~~10,63~~~~~~~0FC5
// padma.gdan:
\tmw9064~192,5~~10,64~~~~~~~0FC6
// rdo.rje.rgya.gram:
\tmw9065~193,5~~10,65~~~~~~~0FC7
// phur.ba:
\tmw9066~194,5~~10,66~~~~~~~0FC8
// nor.bu:
\tmw9067~195,5~~10,67~~~~~~~0FC9
// nor.bu.gnyis.khyil:
\tmw9068~196,5~~10,68~~~~~~~0FCA
// nor.bu.gsum.khyil:
\tmw9069~197,5~~10,69~~~~~~~0FCB
// nor.bu.bzhi.khyil:
\tmw9070~198,5~~10,70~~~~~~~0FCC
// bindu + datse + thigle:
\tmw7095~242,1~~8,95~~~~~~~none
// DLC FIXME: 0F7E? Or is there no Unicode that corresponds to this? WE GOT 2 7Es!
\tmw7090~238,1~~8,90~~~~~~~0F7E
// zhu.yig.mgo.rgyan:
\tmw8033~33,5~~9,33~~~~~~~0F0A
// bka'.shog.mgo.rgyan:
\tmw8034~34,5~~9,34~~~~~~~none
// mnyam.yig.mgo.rgyan:
\tmw8035~35,5~~9,35~~~~~~~none
// mnyam.yig.mgo.rgyan:
\tmw8036~36,5~~9,36~~~~~~~0F09
// nameless sign:
\tmw8037~37,5~~9,37~~~~~~~none
// shad (hooked) + single tsheg:
\tmw8044~41,5~~9,44~~~~~~~none
// shad + double tsheg:
\tmw8046~42,5~~9,46~~~~~~~none
// sbrul.shad + single ornament:
\tmw8048~44,5~~9,48~~~~~~~none
// sbrul.shad + double ornament:
\tmw8049~46,5~~9,49~~~~~~~none
// sbrul.shad variant form:
\tmw8050~47,5~~9,50~~~~~~~none
// reversed hu:
\tmw9043~147,5~~10,43~~~~~~~none
// Inverted ha:
\tmw9044~148,5~~10,44~~~~~~~none