From 229536884f70d0256cc93625ad57b9dc03bc4a1c Mon Sep 17 00:00:00 2001 From: dchandler Date: Mon, 30 Jun 2003 02:24:11 +0000 Subject: [PATCH] I've validated by hand the TM<->TMW mappings. A few things changed, so no previous TM->TMW or TMW->TM conversions can be trusted. --- source/org/thdl/tib/text/TibetanDocument.java | 95 +++-- .../org/thdl/tib/text/TibetanMachineWeb.java | 13 +- source/org/thdl/tib/text/tibwn.ini | 367 +++++++++--------- 3 files changed, 263 insertions(+), 212 deletions(-) diff --git a/source/org/thdl/tib/text/TibetanDocument.java b/source/org/thdl/tib/text/TibetanDocument.java index 8ccf5c8..224ad0a 100644 --- a/source/org/thdl/tib/text/TibetanDocument.java +++ b/source/org/thdl/tib/text/TibetanDocument.java @@ -581,30 +581,6 @@ public class TibetanDocument extends DefaultStyledDocument { numAttemptedReplacements); } - /** For debugging only. Start with an empty document, and call - this on it. You'll get all the TibetanMachine glyphs - inserted, in order, into your document. */ - private void insertAllTMGlyphs() { - int font; - int ord; - DuffData[] equivalent = new DuffData[1]; - equivalent[0] = new DuffData(); - - int count = 0; - for (font = 0; font < 5; font++) { - for (ord = 32; ord < 255; ord++) { - if (TibetanMachineWeb.mapTMtoTMW(font, ord, 0) != null) { - equivalent[0].setData((char)ord, font + 1); - try { - insertDuff(tibetanFontSize, count++, equivalent, false); - } catch (NullPointerException e) { - System.err.println("nullpointerexception happened: font is " + font + " ord is " + ord); - } - } - } - } - } - /** This setting determines whether the formatting is preserved, but with infinite loops in it, or is not preserved, but works well. Inserting + removing must be used rather than replacing @@ -781,7 +757,6 @@ public class TibetanDocument extends DefaultStyledDocument { You'll see it coming (TM->TMW) and going (if you do TMW->TM again). I wonder if finalEndPos isn't one shy of where you'd think it would be. FIXME */ - ThdlDebug.noteIffyCode(); } return ceh.errorReturn; } @@ -1130,6 +1105,76 @@ public class TibetanDocument extends DefaultStyledDocument { return (Element[])v.toArray(arrayType); } + /** For debugging only. Start with an empty document, and call + this on it. You'll get all the TibetanMachine glyphs + inserted, in order, into your document. */ + private void insertAllTMGlyphs() { + int font; + int ord; + DuffData[] equivalent = new DuffData[1]; + equivalent[0] = new DuffData(); + + int count = 0; + for (font = 0; font < 5; font++) { + for (ord = 32; ord < 255; ord++) { + if (TibetanMachineWeb.mapTMtoTMW(font, ord, 0) != null) { + equivalent[0].setData((char)ord, font + 1); + try { + insertDuff(tibetanFontSize, count++, equivalent, false); + } catch (NullPointerException e) { + System.err.println("nullpointerexception happened: font is " + font + " ord is " + ord); + } + } + } + } + } + + /** I used this to create a document that helped me validate the + TM->TMW conversion. */ + private void insertAllTMGlyphs2(MutableAttributeSet roman) { + int font; + int ord; + DuffData[] equivalent = new DuffData[1]; + equivalent[0] = new DuffData(); + DuffData[] tmwEquivalent = new DuffData[1]; + tmwEquivalent[0] = new DuffData(); + DuffData[] achen = new DuffData[1]; + achen[0] = new DuffData(); + achen[0].setData((char)62, 1); + DuffData[] newline = new DuffData[1]; + newline[0] = new DuffData(); + newline[0].setData((char)10, 1); + DuffData[] space = new DuffData[1]; + space[0] = new DuffData(); + space[0].setData((char)32, 1); + + int count = 0; + for (font = 0; font < 5; font++) { + for (ord = 32; ord < 255; ord++) { + DuffCode tmw; + if ((tmw = TibetanMachineWeb.mapTMtoTMW(font, ord, 0)) != null) { + equivalent[0].setData((char)ord, font + 1); + tmwEquivalent[0].setData(tmw.getCharacter(), tmw.getFontNum()); + try { + insertDuff(72, count++, achen, false); + insertDuff(72, count++, equivalent, false); + insertDuff(72, count++, achen, false); + insertDuff(72, count++, tmwEquivalent, true); + + } catch (NullPointerException e) { + System.err.println("nullpointerexception happened: font is " + font + " ord is " + ord); + } + try { + String s = " font " + (font+1) + "; ord " + ord + "\n"; + insertString(count, s, roman); + count += s.length(); + } catch (BadLocationException e) { + throw new Error("badness"); + } + } + } + } + } } /** A helper class used by TibetanDocument.convertHelper(..). */ diff --git a/source/org/thdl/tib/text/TibetanMachineWeb.java b/source/org/thdl/tib/text/TibetanMachineWeb.java index f705984..0659571 100644 --- a/source/org/thdl/tib/text/TibetanMachineWeb.java +++ b/source/org/thdl/tib/text/TibetanMachineWeb.java @@ -1043,6 +1043,12 @@ public static DuffCode mapTMtoTMW(int font, int ordinal, int suggestedFont) { return null; } } + // We map TibetanMachineSkt1.45, TibetanMachineSkt1.45, + // TibetanMachineSkt1.45, and TibetanMachineSkt1.45 to + // TibetanMachineWeb*.45, even though they're actually just + // garbage, since TibetanMachine.45 is the only tsheg in TM. We + // assume that a machine goofed along the way. (FIXME: optionally + // warn.) if ((0 != suggestedFont) && (32 == ordinal || 45 == ordinal)) { return new DuffCode(suggestedFont, (char)ordinal); // FIXME: don't create a new one each time; it wastes heap } @@ -1096,7 +1102,12 @@ public static DuffCode mapTMWtoTM(int font, int ordinal, int suggestedFont) { return null; } } - if ((0 != suggestedFont) && (32 == ordinal || 45 == ordinal)) { + if (45 == ordinal) { + // TibetanMachine.45 is a tsheg, but TibetanMachineSkt2.45 + // etc. are not tshegs. + return new DuffCode(1, (char)ordinal); + } + if ((0 != suggestedFont) && (32 == ordinal)) { return new DuffCode(suggestedFont, (char)ordinal); // FIXME: don't create a new one each time; it wastes heap } DuffCode ans = TMWtoTM[font][ordinal-32]; diff --git a/source/org/thdl/tib/text/tibwn.ini b/source/org/thdl/tib/text/tibwn.ini index bf8cb49..9b2160e 100644 --- a/source/org/thdl/tib/text/tibwn.ini +++ b/source/org/thdl/tib/text/tibwn.ini @@ -495,8 +495,8 @@ n+n+y~123,3~~6,34~1,109~6,122~1,123~1,125~6,108~6,115~f53,fa3,f61 n+p~124,3~~6,35~1,109~6,120~1,123~1,125~6,106~6,113~f53,fa4 n+p+r~125,3~~6,36~1,109~6,121~1,123~1,125~6,107~6,114~f53,fa4,fb2 n+ph~126,3~~6,37~1,109~6,120~1,123~1,125~6,106~6,113~f53,fa5 -n+m~253,3~~6,38~1,109~6,120~1,123~1,125~6,106~6,113~f53,fa8 -n+b+h+y~128,3~~6,39~1,109~6,125~1,123~1,125~6,111~6,118~f53,fa6,fb7,fb1 +n+m~253,3~~6,39~1,109~6,120~1,123~1,125~6,106~6,113~f53,fa8 +n+b+h+y~128,3~~6,38~1,109~6,125~1,123~1,125~6,111~6,118~f53,fa6,fb7,fb1 n+ts~129,3~~6,40~1,109~6,120~1,123~1,125~6,106~6,113~f53,fa9 n+y~130,3~~6,41~1,109~6,120~1,123~1,125~6,106~6,113~f53,fb1 n+r~131,3~~6,42~1,109~6,120~1,123~1,125~6,106~6,113~f53,fb2 @@ -737,182 +737,6 @@ a+r+y~145,4~~8,65~1,109~8,121~1,123~1,125~8,107~8,114~f68,fb2,fb1 8+1/2~78,5~~9,81~~~~~~~0F31 9+1/2~79,5~~9,82~~~~~~~0F32 -// cantillation sign,heavy beat: -\tmw8082~80,5~~9,83~~~~~~~0FC0 -// cantillation sign,light beat: -\tmw8083~81,5~~9,84~~~~~~~0FC1 -// cantillation sign,cang.te-u: -\tmw8084~82,5~~9,85~~~~~~~0FC2 -// cantillation sign sbub.chal: -\tmw8085~83,5~~9,86~~~~~~~0FC3 -// zhi.rol.btags: -\tmw8086~84,5~~9,87~~~~~~~none - -// sher.bu: -\tmw8088~90,5~~9,88~~~~~~~none -// kuruka: -\tmw8090~92,5~~9,90~~~~~~~none -// no name: -\tmw8091~93,5~~9,91~~~~~~~none - -// DLC FIXME: aren't these elsewhere in unicode, though? 534d, e.g.? use it? -// yungs.drung (reversed): -\tmw8097~97,5~~9,97~~~~~~~none -// yungs.drung (standard): -\tmw8098~98,5~~9,98~~~~~~~none - -// mchan rtags trailing: -\tmw8099~99,5~~9,99~~~~~~~none -// mchan rtags leading: -\tmw8100~100,5~~9,100~~~~~~~none - -// mtshan.rtags: -\tmw8101~101,5~~9,101~~~~~~~0F37 -// mtshan.rtags zhes.sa: -\tmw8102~102,5~~9,102~~~~~~~0F35 -// che.mgo: -\tmw8103~103,5~~9,103~~~~~~~0F38 -// kuruka: -\tmw8104~104,5~~9,104~~~~~~~0FBE -// Kuruka.mig.lda: -\tmw8105~105,5~~9,105~~~~~~~0FBF -// ornament: -\tmw8106~106,5~~9,106~~~~~~~0F36 -// yang.rtags: -\tmw8107~107,5~~9,107~~~~~~~0F87 -// lci.rtags: -\tmw8108~108,5~~9,108~~~~~~~0F86 -// mchu.can: -\tmw8109~109,5~~9,109~~~~~~~0F89 -// gru.can.rgyings: -\tmw8110~110,5~~9,110~~~~~~~0F8A -// gru.med.gyings: -\tmw8111~111,5~~9,111~~~~~~~0F8B - -// single white pebble: -\tmw8115~115,5~~9,115~~~~~~~0F1A -// single black pebble: -\tmw8116~116,5~~9,116~~~~~~~0F1D -// double white pebble: -\tmw8117~117,5~~9,117~~~~~~~0F1B -// double black pebble: -\tmw8118~118,5~~9,118~~~~~~~0F1E -// white and black pebble: -\tmw8119~119,5~~9,119~~~~~~~0F1F -// triple white pebble: -\tmw8120~120,5~~9,120~~~~~~~0F1C -// triple black pebble: -\tmw8121~121,5~~9,121~~~~~~~0FCF - -\tmw8122~122,5~~9,122~~~~~~~none -\tmw8123~123,5~~9,123~~~~~~~none -\tmw8124~124,5~~9,124~~~~~~~none -\tmw8125~125,5~~9,125~~~~~~~none -\tmw8126~126,5~~9,126~~~~~~~none -\tmw9033~128,5~~10,33~~~~~~~none - -// logo sign chad.rtags: -\tmw9034~129,5~~10,34~~~~~~~0F15 -// logo sign lhag.rtags: -\tmw9035~130,5~~10,35~~~~~~~0F16 -// sgra.gcan.char.rtags: -\tmw9036~131,5~~10,36~~~~~~~0F17 -// khyud.pa: -\tmw9037~132,5~~10,37~~~~~~~0F18 -// sdong.tshugs: -\tmw9038~133,5~~10,38~~~~~~~0F19 - -// utsama ka: -\tmw2059~57,2~~3,59~1,109~4,120~1,125~1,123~4,106~4,113~f88,f90 -// utsama kha: -\tmw2060~58,2~~3,60~1,109~4,120~1,125~1,123~4,106~4,113~f88,f91 - -// tza.'phru: -\tmw7067~174,4~~8,67~~~~~~~0F39 -// DLC FIXME: 0F39, yes? -// reversed tza.'phru: -\tmw7068~145,5~~8,68~~~~~~~0F39 -// damaru.rtags: -\tmw7072~178,4~~8,72~~~~~~~0F88 -// half a.chen: -\tmw7073~179,4~~8,73~~~~~~~0F01 -// DLC f68,fa0,f80,f72 comes close, but fa0 would be larger. -// ITHI secret sign: -\tmw7074~180,4~~8,74~~~~~~~none -// Terton's mark: -\tmw7075~181,4~~8,75~~~~~~~none -// Terton's mark: -\tmw7076~182,4~~8,76~~~~~~~none -// Terton's mark: -\tmw7077~183,4~~8,77~~~~~~~none -// Note that this is close to 0f11: -// Terton's mark: -\tmw7078~149,5~~8,78~~~~~~~none -// Terma mark: -\tmw7079~184,4~~8,79~~~~~~~none -// Terma mark: -\tmw7080~185,4~~8,80~~~~~~~none -// Terma mark: -\tmw7081~186,4~~8,81~~~~~~~none -// Mark: -\tmw7082~187,4~~8,82~~~~~~~none - -// DLC see chinese unicode: -// Chinese letter: -\tmw9060~155,5~~10,60~~~~~~~none - -// dril.bu: -\tmw9062~190,5~~10,62~~~~~~~0FC4 -// rdo.rje: -\tmw9063~191,5~~10,63~~~~~~~0FC5 -// padma.gdan: -\tmw9064~192,5~~10,64~~~~~~~0FC6 -// rdo.rje.rgya.gram: -\tmw9065~193,5~~10,65~~~~~~~0FC7 -// phur.ba: -\tmw9066~194,5~~10,66~~~~~~~0FC8 -// nor.bu: -\tmw9067~195,5~~10,67~~~~~~~0FC9 -// nor.bu.gnyis.khyil: -\tmw9068~196,5~~10,68~~~~~~~0FCA -// nor.bu.gsum.khyil: -\tmw9069~197,5~~10,69~~~~~~~0FCB -// nor.bu.bzhi.khyil: -\tmw9070~198,5~~10,70~~~~~~~0FCC - -// bindu + datse + thigle: -\tmw7095~242,1~~8,95~~~~~~~none - -// DLC FIXME: 0F7E? Or is there no Unicode that corresponds to this? WE GOT 2 7Es! -\tmw7090~238,1~~8,90~~~~~~~0F7E - -// zhu.yig.mgo.rgyan: -\tmw8033~33,5~~9,33~~~~~~~0F0A -// bka'.shog.mgo.rgyan: -\tmw8034~34,5~~9,34~~~~~~~none -// mnyam.yig.mgo.rgyan: -\tmw8035~35,5~~9,35~~~~~~~none -// mnyam.yig.mgo.rgyan: -\tmw8036~36,5~~9,36~~~~~~~0F09 -// nameless sign: -\tmw8037~37,5~~9,37~~~~~~~none - -// shad (hooked) + single tsheg: -\tmw8044~41,5~~9,44~~~~~~~none -// shad + double tsheg: -\tmw8046~42,5~~9,46~~~~~~~none -// sbrul.shad + single ornament: -\tmw8048~44,5~~9,48~~~~~~~none -// sbrul.shad + double ornament: -\tmw8049~46,5~~9,49~~~~~~~none -// sbrul.shad variant form: -\tmw8050~47,5~~9,50~~~~~~~none - -// reversed hu: -\tmw9043~147,5~~10,43~~~~~~~none -// Inverted ha: -\tmw9044~148,5~~10,44~~~~~~~none - // DLC is this for (40 etc.),7c,60,72 ??? or for standalone f60,f72,f7c? // Special combination: @@ -928,21 +752,13 @@ _~32,1~~1,32~~~~~~~0020 _~32,1~~2,32~~~~~~~0020 ~45,1~~2,45~~~~~~~0F0B _~32,2~~3,32~~~~~~~0020 - ~45,2~~3,45~~~~~~~0F0B _~32,2~~4,32~~~~~~~0020 - ~45,2~~4,45~~~~~~~0F0B _~32,3~~5,32~~~~~~~0020 - ~45,3~~5,45~~~~~~~0F0B _~32,3~~6,32~~~~~~~0020 - ~45,3~~6,45~~~~~~~0F0B _~32,4~~7,32~~~~~~~0020 - ~45,4~~7,45~~~~~~~0F0B _~32,4~~8,32~~~~~~~0020 - ~45,4~~8,45~~~~~~~0F0B _~32,5~~9,32~~~~~~~0020 - ~45,5~~9,45~~~~~~~0F0B _~32,5~~10,32~~~~~~~0020 - ~45,5~~10,45~~~~~~~0F0B //bindus // DLC FIXME: bindu is 7e or not? @@ -1138,3 +954,182 @@ y~175,4~~8,70~~~~~~~0FB1 // ra.btags: r~176,4~~8,71~~~~~~~0FB2 +// DLC FIXME: remove the tag and you get TMW->Wylie +// mappings for all these. + +// cantillation sign,heavy beat: +\tmw8082~80,5~~9,83~~~~~~~0FC0 +// cantillation sign,light beat: +\tmw8083~81,5~~9,84~~~~~~~0FC1 +// cantillation sign,cang.te-u: +\tmw8084~82,5~~9,85~~~~~~~0FC2 +// cantillation sign sbub.chal: +\tmw8085~83,5~~9,86~~~~~~~0FC3 +// zhi.rol.btags: +\tmw8086~84,5~~9,87~~~~~~~none + +// sher.bu: +\tmw8088~90,5~~9,88~~~~~~~none +// kuruka: +\tmw8090~92,5~~9,90~~~~~~~none +// no name: +\tmw8091~93,5~~9,91~~~~~~~none + +// DLC FIXME: aren't these elsewhere in unicode, though? 534d, e.g.? use it? +// yungs.drung (reversed): +\tmw8097~97,5~~9,97~~~~~~~none +// yungs.drung (standard): +\tmw8098~98,5~~9,98~~~~~~~none + +// mchan rtags trailing: +\tmw8099~99,5~~9,99~~~~~~~none +// mchan rtags leading: +\tmw8100~100,5~~9,100~~~~~~~none + +// mtshan.rtags: +\tmw8101~101,5~~9,101~~~~~~~0F37 +// mtshan.rtags zhes.sa: +\tmw8102~102,5~~9,102~~~~~~~0F35 +// che.mgo: +\tmw8103~103,5~~9,103~~~~~~~0F38 +// kuruka: +\tmw8104~104,5~~9,104~~~~~~~0FBE +// Kuruka.mig.lda: +\tmw8105~105,5~~9,105~~~~~~~0FBF +// ornament: +\tmw8106~106,5~~9,106~~~~~~~0F36 +// yang.rtags: +\tmw8107~107,5~~9,107~~~~~~~0F87 +// lci.rtags: +\tmw8108~108,5~~9,108~~~~~~~0F86 +// mchu.can: +\tmw8109~109,5~~9,109~~~~~~~0F89 +// gru.can.rgyings: +\tmw8110~110,5~~9,110~~~~~~~0F8A +// gru.med.gyings: +\tmw8111~111,5~~9,111~~~~~~~0F8B + +// single white pebble: +\tmw8115~115,5~~9,115~~~~~~~0F1A +// single black pebble: +\tmw8116~116,5~~9,116~~~~~~~0F1D +// double white pebble: +\tmw8117~117,5~~9,117~~~~~~~0F1B +// double black pebble: +\tmw8118~118,5~~9,118~~~~~~~0F1E +// white and black pebble: +\tmw8119~119,5~~9,119~~~~~~~0F1F +// triple white pebble: +\tmw8120~120,5~~9,120~~~~~~~0F1C +// triple black pebble: +\tmw8121~121,5~~9,121~~~~~~~0FCF + +\tmw8122~122,5~~9,122~~~~~~~none +\tmw8123~123,5~~9,123~~~~~~~none +\tmw8124~124,5~~9,124~~~~~~~none +\tmw8125~125,5~~9,125~~~~~~~none +\tmw8126~126,5~~9,126~~~~~~~none +\tmw9033~128,5~~10,33~~~~~~~none + +// logo sign chad.rtags: +\tmw9034~129,5~~10,34~~~~~~~0F15 +// logo sign lhag.rtags: +\tmw9035~130,5~~10,35~~~~~~~0F16 +// sgra.gcan.char.rtags: +\tmw9036~131,5~~10,36~~~~~~~0F17 +// khyud.pa: +\tmw9037~132,5~~10,37~~~~~~~0F18 +// sdong.tshugs: +\tmw9038~133,5~~10,38~~~~~~~0F19 + +// utsama ka: +\tmw2059~57,2~~3,59~1,109~4,120~1,125~1,123~4,106~4,113~f88,f90 +// utsama kha: +\tmw2060~58,2~~3,60~1,109~4,120~1,125~1,123~4,106~4,113~f88,f91 + +// tza.'phru: +\tmw7067~174,4~~8,67~~~~~~~0F39 +// DLC FIXME: 0F39, yes? +// reversed tza.'phru: +\tmw7068~145,5~~8,68~~~~~~~0F39 +// damaru.rtags: +\tmw7072~178,4~~8,72~~~~~~~0F88 +// half a.chen: +\tmw7073~179,4~~8,73~~~~~~~0F01 +// DLC f68,fa0,f80,f72 comes close, but fa0 would be larger. +// ITHI secret sign: +\tmw7074~180,4~~8,74~~~~~~~none +// Terton's mark: +\tmw7075~181,4~~8,75~~~~~~~none +// Terton's mark: +\tmw7076~182,4~~8,76~~~~~~~none +// Terton's mark: +\tmw7077~183,4~~8,77~~~~~~~none +// Note that this is close to 0f11: +// Terton's mark: +\tmw7078~149,5~~8,78~~~~~~~none +// Terma mark: +\tmw7079~184,4~~8,79~~~~~~~none +// Terma mark: +\tmw7080~185,4~~8,80~~~~~~~none +// Terma mark: +\tmw7081~186,4~~8,81~~~~~~~none +// Mark: +\tmw7082~187,4~~8,82~~~~~~~none + +// DLC see chinese unicode: +// Chinese letter: +\tmw9060~155,5~~10,60~~~~~~~none + +// dril.bu: +\tmw9062~190,5~~10,62~~~~~~~0FC4 +// rdo.rje: +\tmw9063~191,5~~10,63~~~~~~~0FC5 +// padma.gdan: +\tmw9064~192,5~~10,64~~~~~~~0FC6 +// rdo.rje.rgya.gram: +\tmw9065~193,5~~10,65~~~~~~~0FC7 +// phur.ba: +\tmw9066~194,5~~10,66~~~~~~~0FC8 +// nor.bu: +\tmw9067~195,5~~10,67~~~~~~~0FC9 +// nor.bu.gnyis.khyil: +\tmw9068~196,5~~10,68~~~~~~~0FCA +// nor.bu.gsum.khyil: +\tmw9069~197,5~~10,69~~~~~~~0FCB +// nor.bu.bzhi.khyil: +\tmw9070~198,5~~10,70~~~~~~~0FCC + +// bindu + datse + thigle: +\tmw7095~242,1~~8,95~~~~~~~none + +// DLC FIXME: 0F7E? Or is there no Unicode that corresponds to this? WE GOT 2 7Es! +\tmw7090~238,1~~8,90~~~~~~~0F7E + +// zhu.yig.mgo.rgyan: +\tmw8033~33,5~~9,33~~~~~~~0F0A +// bka'.shog.mgo.rgyan: +\tmw8034~34,5~~9,34~~~~~~~none +// mnyam.yig.mgo.rgyan: +\tmw8035~35,5~~9,35~~~~~~~none +// mnyam.yig.mgo.rgyan: +\tmw8036~36,5~~9,36~~~~~~~0F09 +// nameless sign: +\tmw8037~37,5~~9,37~~~~~~~none + +// shad (hooked) + single tsheg: +\tmw8044~41,5~~9,44~~~~~~~none +// shad + double tsheg: +\tmw8046~42,5~~9,46~~~~~~~none +// sbrul.shad + single ornament: +\tmw8048~44,5~~9,48~~~~~~~none +// sbrul.shad + double ornament: +\tmw8049~46,5~~9,49~~~~~~~none +// sbrul.shad variant form: +\tmw8050~47,5~~9,50~~~~~~~none + +// reversed hu: +\tmw9043~147,5~~10,43~~~~~~~none +// Inverted ha: +\tmw9044~148,5~~10,44~~~~~~~none +