The Wylie 'M' used to map to TMW7.91, when it should map to TMW7.90.

I've fixed that.

I've also added a couple of Unicode mappings to give a flavor for how
multi-codepoint mappings will be represented.

TM->TMW conversion takes about 1 second per thousand glyphs on my
PIII-550.
This commit is contained in:
dchandler 2003-06-01 23:05:32 +00:00
parent 54ca37c824
commit 0f724989b5
4 changed files with 184 additions and 94 deletions

View file

@ -327,14 +327,39 @@ public class Jskad extends JPanel implements DocumentListener {
JMenuItem toTMItem = new JMenuItem("Convert TMW to TM"); // DLC FIXME: do it just in the selection?
toTMItem.addActionListener(new ThdlActionListener() {
public void theRealActionPerformed(ActionEvent e) {
((TibetanDocument)dp.getDocument()).convertToTM(0, -1); // entire document
StringBuffer errors = new StringBuffer();
boolean errorReturn
= ((TibetanDocument)dp.getDocument()).convertToTM(0, -1, errors); // entire document
if (errorReturn) {
JOptionPane.showMessageDialog(Jskad.this,
"At least one error occurred while converting Tibetan Machine Web\nto Tibetan Machine. Your document is mostly converted,\nexcept for the glyphs found after the 72-point Tibetan Machine Web\n30-letter alphabet.\nThe following glyphs were problems:\n"
+ errors.toString(),
"TMW to TM Errors",
JOptionPane.PLAIN_MESSAGE);
} else {
JOptionPane.showMessageDialog(Jskad.this,
"Converting Tibetan Machine Web to Tibetan Machine met with perfect success.",
"Success", JOptionPane.PLAIN_MESSAGE);
}
}
});
JMenuItem toTMWItem = new JMenuItem("Convert TM to TMW"); // DLC FIXME: do it just in the selection?
toTMWItem.addActionListener(new ThdlActionListener() {
public void theRealActionPerformed(ActionEvent e) {
((TibetanDocument)dp.getDocument()).convertToTMW(0, -1); // entire document
StringBuffer errors = new StringBuffer();
boolean errorReturn
= ((TibetanDocument)dp.getDocument()).convertToTMW(0, -1, errors); // entire document
if (errorReturn) {
JOptionPane.showMessageDialog(Jskad.this,
"At least one error occurred while converting Tibetan Machine\nto Tibetan Machine Web. Your document is mostly converted,\nexcept for the glyphs found after the 72-point Tibetan Machine Web\n30-letter alphabet.\nThe following glyphs were problems:\n"
+ errors.toString(),
"TM to TMW Errors", JOptionPane.PLAIN_MESSAGE);
} else {
JOptionPane.showMessageDialog(Jskad.this,
"Converting Tibetan Machine to Tibetan Machine Web met with perfect success.",
"Success", JOptionPane.PLAIN_MESSAGE);
}
}
});
toolsMenu.addSeparator();

View file

@ -89,7 +89,8 @@ public class TMW_RTF_TO_THDL_WYLIE {
out.println(" file. Writes the THDL Extended Wylie transliteration of that file [in");
out.println(" --to-wylie mode] or the TibetanMachine equivalent of that file [in");
out.println(" --to-tibetan-machine mode] to standard output after dealing with the curly");
out.println(" brace problem. Exit code is zero on success, nonzero otherwise.");
out.println(" brace problem. Exit code is zero on success, 42 if some TibetanMachine glyphs");
out.println(" couldn't be understood (though output is still given), nonzero otherwise.");
out.println("");
out.println(" You may find it helpful to use `--find-some-non-tmw' mode before doing a");
out.println(" conversion so that you have confidence in the conversion's correctness.");
@ -127,6 +128,7 @@ public class TMW_RTF_TO_THDL_WYLIE {
// Fix curly braces in the entire document:
((TibetanDocument)dp.getDocument()).replaceTahomaCurlyBracesAndBackslashes(0, -1);
int exitCode = 0;
if (convertToWylieMode) {
ThdlDebug.verify(!convertToTMMode);
// Convert to THDL Wylie:
@ -134,14 +136,14 @@ public class TMW_RTF_TO_THDL_WYLIE {
} else {
ThdlDebug.verify(convertToTMMode);
// Convert to TibetanMachine:
((TibetanDocument)dp.getDocument()).convertToTM(0, dp.getDocument().getLength());
if (!((TibetanDocument)dp.getDocument()).convertToTM(0, dp.getDocument().getLength(), null))
exitCode = 42;
}
// Write to standard output the result:
((TibetanDocument)dp.getDocument()).writeRTFOutputStream(out);
// Exit normally:
return 0;
return exitCode;
}
} catch (ThdlLazyException e) {
out.println("TMW_RTF_TO_THDL_WYLIE has a BUG:");

View file

@ -25,6 +25,7 @@ import javax.swing.text.rtf.RTFEditorKit;
import java.io.*;
import org.thdl.util.ThdlDebug;
import org.thdl.util.ThdlOptions;
/** Represents a character meant to be rendered in a certain font.
* @author David Chandler
@ -379,6 +380,7 @@ public class TibetanDocument extends DefaultStyledDocument {
break;
}
if (null != toReplaceWith) {
// SPEED_FIXME: determining font size might be slow
int fontSize = tibetanFontSize;
try {
fontSize = ((Integer)getCharacterElement(i).getAttributes().getAttribute(StyleConstants.FontSize)).intValue();
@ -403,9 +405,14 @@ public class TibetanDocument extends DefaultStyledDocument {
the document. Be sure to set the size for Tibetan as you like
it before using this (well, it usually gets it right on its
own, but just in case). SPEED_FIXME: might be faster to run
over the elements, if they are one per font. */
public void convertToTM(int begin, int end) {
convertTMW_TM(begin, end, true);
over the elements, if they are one per font.
@return true on 100% success, false if any exceptional case
was encountered
@param errors if non-null, then notes about all exceptional
cases will be appended to this StringBuffer
*/
public boolean convertToTM(int begin, int end, StringBuffer errors) {
return convertTMW_TM(begin, end, true, errors);
}
/** Converts all TibetanMachine glyphs in the document to
@ -414,24 +421,38 @@ public class TibetanDocument extends DefaultStyledDocument {
the end of the document. Be sure to set the size for Tibetan
as you like it before using this (well, it usually gets it
right on its own, but just in case). SPEED_FIXME: might be
faster to run over the elements, if they are one per font. */
public void convertToTMW(int begin, int end) {
convertTMW_TM(begin, end, false);
faster to run over the elements, if they are one per font.
@return true on 100% success, false if any exceptional case
was encountered
@param errors if non-null, then notes about all exceptional
cases will be appended to this StringBuffer
*/
public boolean convertToTMW(int begin, int end, StringBuffer errors) {
return convertTMW_TM(begin, end, false, errors);
}
/** Helper function.
@param errors if non-null, then notes about all exceptional
cases will be appended to this StringBuffer
@return true on 100% success, false if any exceptional case
was encountered
@see convertToTMW(int,int)
@see convertToTM(int,int) */
private void convertTMW_TM(int begin, int end, boolean toTM) {
private boolean convertTMW_TM(int begin, int end, boolean toTM,
StringBuffer errors) {
boolean toStdout = ThdlOptions.getBooleanOption("thdl.debug");
boolean errorReturn = false;
if (end < 0)
end = getLength();
if (begin >= end)
return;
return errorReturn; // nothing to do, so no errors in the doing.
int i = begin;
HashMap problemGlyphsTable = new HashMap();
try {
Position endPos = createPosition(end);
DuffData[] equivalent = new DuffData[1];
equivalent[0] = new DuffData();
while (i < end) {
while (i < endPos.getOffset()) {
AttributeSet attr = getCharacterElement(i).getAttributes();
String fontName = StyleConstants.getFontFamily(attr);
int fontNum
@ -441,7 +462,6 @@ public class TibetanDocument extends DefaultStyledDocument {
if (0 != fontNum) {
DuffCode dc = null;
try {
if (toTM) {
dc = TibetanMachineWeb.mapTMWtoTM(fontNum - 1,
getText(i,1).charAt(0));
@ -452,39 +472,55 @@ public class TibetanDocument extends DefaultStyledDocument {
if (null != dc) {
equivalent[0].setData(dc.getCharacter(),
dc.getFontNum());
}
} catch (ArrayIndexOutOfBoundsException e) {
// we handle this below...
System.out.println("FIXME: "
+ (toTM ? "TMW->TM" : "TM->TMW")
+ " conversion is in trouble");
System.out.println("font is " + (fontNum - 1)
+ ", char is "
+ (int)getText(i,1).charAt(0)
+ "; pos is " + i);
ThdlDebug.noteIffyCode();
}
if (null != dc) {
// SPEED_FIXME: determining font size might be slow
int fontSize = tibetanFontSize;
try {
fontSize = ((Integer)getCharacterElement(i).getAttributes().getAttribute(StyleConstants.FontSize)).intValue();
} catch (Exception e) {
// leave it as tibetanFontSize
}
insertDuff(fontSize, i, equivalent, !toTM);
remove(i+1, 1);
// We have two choices: remove-then-insert
// second vs. insert-then-remove and also
// insert-before vs. insert-after. It turns
// out that insert-after preserves formatting
// whereas insert-before doesn't. And we do
// insert-then-remove because we're guessing
// that helps with formatting too.
insertDuff(fontSize, i+1, equivalent, !toTM);
remove(i, 1);
} else {
// DLC FIXME: insert into document a string
// saying "there's no TM equivalent for this."
// (For now, I'm inserting the alphabet and
// all the numbers in a big font in TMW to try
// and get some attention. And I've
// saying "<<[[there's no TM equivalent for
// this, details are ...]]>>" (For now, I'm
// inserting the alphabet in a big font in TMW
// to try and get some attention. And I've
// *documented* this on the website.)
errorReturn = true;
CharacterInAGivenFont cgf
= new CharacterInAGivenFont(getText(i,1), fontName);
if (!problemGlyphsTable.containsKey(cgf)) {
problemGlyphsTable.put(cgf, "yes this character appears once");
if (null != errors) {
String err
= (toTM ? "TMW->TM" : "TM->TMW")
+ " conversion failed for a glyph:\nFont is "
+ fontName + ", glyph number is "
+ (int)getText(i,1).charAt(0)
+ "; first position found (from zero) is "
+ i + "\n";
errors.append(err);
if (toStdout) {
System.out.print(err);
}
}
}
String trickyTMW
= "!-\"-#-,-%-&-'-(-)-*-+-,-.-/-0-1-2-3-4-5-6-7-8-9-:-;-<-=->-?-0-1-2-3-4-5-6-7-8-9-";
= "!-\"-#-$-%-&-'-(-)-*-+-,-.-/-0-1-2-3-4-5-6-7-8-9-:-;-<-=->-?-";
equivalent[0] = new DuffData(trickyTMW, 1);
insertDuff(72, i, equivalent, true);
i += trickyTMW.length() + 1;
i += trickyTMW.length();
}
}
i++;
@ -493,5 +529,6 @@ public class TibetanDocument extends DefaultStyledDocument {
ble.printStackTrace();
ThdlDebug.noteIffyCode();
}
return errorReturn;
}
}

View file

@ -893,17 +893,16 @@ private static final DuffCode TMW_tab = new DuffCode(1, '\t');
Null is never returned for an existing TibetanMachine glyph,
because every TibetanMachine glyph has a corresponding
TibetanMachineWeb glyph. But if (font, ord) doesn't correspond to
an existing TibetanMachine glyph, null is returned. In general,
though, this method may raise a runtime exception if you pass in a
(font, ord) that doesn't correspond to an existing TibetanMachine
glyph.
TibetanMachineWeb glyph. Null is returned if the input isn't
valid.
Only a few control characters are supported: '\r' (carriage
return), '\n' (line feed), and '\t' (tab).
*/
public static DuffCode mapTMtoTMW(int font, int ordinal)
throws ArrayIndexOutOfBoundsException {
return), '\n' (line feed), and '\t' (tab). */
public static DuffCode mapTMtoTMW(int font, int ordinal) {
if (font < 0 || font > 4)
return null;
if (ordinal > 255)
return getUnusualTMtoTMW(font, ordinal);
if (ordinal < 32) {
if (ordinal == (int)'\r')
return TMW_cr;
@ -918,8 +917,6 @@ public static DuffCode mapTMtoTMW(int font, int ordinal)
}
}
DuffCode ans = TMtoTMW[font][ordinal-32];
// comment this out to test via main(..):
ThdlDebug.verify(null != ans);
return ans;
}
@ -934,18 +931,17 @@ private static final DuffCode TM_tab = new DuffCode(1, '\t');
Null is returned for an existing TibetanMachineWeb glyph only if
that glyph is TibetanMachineWeb7.91, because every other
TibetanMachineWeb glyph has a corresponding TibetanMachine glyph.
But if (font, ord) isn't (7, 91) and doesn't correspond to an
existing TibetanMachineWeb glyph, null is returned. In general,
though, this method may raise a runtime exception if you pass in a
(font, ord) that doesn't correspond to an existing
TibetanMachineWeb glyph.
Null is returned if the input isn't valid.
Only a few control characters are supported: '\r' (carriage
return), '\n' (line feed), and '\t' (tab).
*/
public static DuffCode mapTMWtoTM(int font, int ordinal)
throws ArrayIndexOutOfBoundsException {
public static DuffCode mapTMWtoTM(int font, int ordinal) {
if (font < 0 || font > 9)
return null;
if (ordinal > 127)
return null;
if (ordinal < 32) {
if (ordinal == (int)'\r')
return TM_cr;
@ -960,8 +956,6 @@ public static DuffCode mapTMWtoTM(int font, int ordinal)
}
}
DuffCode ans = TMWtoTM[font][ordinal-32];
// comment this out to test via main(..):
ThdlDebug.verify(null != ans || (font == 7 && ordinal == 91));
return ans;
}
@ -1015,49 +1009,81 @@ public static void main(String[] args) {
}
}
private static DuffCode getTMtoTMW(int font, int code) {
if (false) { // DLC FIXME: why was this here?
if (code > 255-32) {
/** Tibet Doc makes weird RTF where you see TibetanMachine.8225 etc.
The highest possible glyph value should be 255, but that's not
what appears. This returns non-null if (font, code) identify an
oddball we know. This list may well be incomplete, but we handle
such oddballs in a first-class fashion. */
private static DuffCode getUnusualTMtoTMW(int font, int code) {
if (code > 255) {
if (font == 0) {
switch (code) {
case 8218-32: //sby
code = 130-32;
break;
case 347: // reduced-height ha
return TMtoTMW[font][156 - 32];
case 8230-32: //sgr
code = 133-32;
break;
case 353: // d-r-w
return TMtoTMW[font][154 - 32];
case 8225-32: //spr
code = 135-32;
break;
case 377: // t-w
return TMtoTMW[font][143 - 32];
case 8117-32: //tshw
code = 146-32;
break;
case 710: // s-b-r
return TMtoTMW[font][136 - 32];
case 8126-32: //rw
code = 149-32;
break;
case 1026: // s-g-y
return TMtoTMW[font][128 - 32];
case 8482-32: //grw
code = 153-32;
break;
case 1027: // s-p-y
return TMtoTMW[font][129 - 32];
case 1106: // d-w
return TMtoTMW[font][144 - 32];
case 8117: // tsh-w
return TMtoTMW[font][146 - 32];
case 8126: // r-w
return TMtoTMW[font][149 - 32];
case 8218: // s-b-y
return TMtoTMW[font][130 - 32];
case 8225: // s-p-r
return TMtoTMW[font][135 - 32];
case 8230: // s-g-r
return TMtoTMW[font][133 - 32];
case 8240: // s-m-r
return TMtoTMW[font][137 - 32];
case 8482: // g-r-w
return TMtoTMW[font][153 - 32];
default:
return null;
}
}
}
} else if (font == 3) {
switch (code) {
case 402: // h+y
return TMtoTMW[font][131 - 32];
return TMtoTMW[font][code];
default:
return null;
}
} else {
return null;
}
} else {
return null;
}
}
/**
* Gets the TibetanMachine font number for this font name.
* @param name a font name
* @return between 1 and 5 if the font is one
* of the TibetanMachine fonts, otherwise 0
*/
* of the TibetanMachine fonts, otherwise 0 */
public static int getTMFontNumber(String name) {
String internedName = name.intern();
for (int i=1; i<tmFontNames.length; i++) {