The Wylie 'M' used to map to TMW7.91, when it should map to TMW7.90.

I've fixed that.

I've also added a couple of Unicode mappings to give a flavor for how
multi-codepoint mappings will be represented.

TM->TMW conversion takes about 1 second per thousand glyphs on my
PIII-550.
This commit is contained in:
dchandler 2003-06-01 23:05:32 +00:00
parent 54ca37c824
commit 0f724989b5
4 changed files with 184 additions and 94 deletions

View file

@ -327,14 +327,39 @@ public class Jskad extends JPanel implements DocumentListener {
JMenuItem toTMItem = new JMenuItem("Convert TMW to TM"); // DLC FIXME: do it just in the selection? JMenuItem toTMItem = new JMenuItem("Convert TMW to TM"); // DLC FIXME: do it just in the selection?
toTMItem.addActionListener(new ThdlActionListener() { toTMItem.addActionListener(new ThdlActionListener() {
public void theRealActionPerformed(ActionEvent e) { public void theRealActionPerformed(ActionEvent e) {
((TibetanDocument)dp.getDocument()).convertToTM(0, -1); // entire document StringBuffer errors = new StringBuffer();
boolean errorReturn
= ((TibetanDocument)dp.getDocument()).convertToTM(0, -1, errors); // entire document
if (errorReturn) {
JOptionPane.showMessageDialog(Jskad.this,
"At least one error occurred while converting Tibetan Machine Web\nto Tibetan Machine. Your document is mostly converted,\nexcept for the glyphs found after the 72-point Tibetan Machine Web\n30-letter alphabet.\nThe following glyphs were problems:\n"
+ errors.toString(),
"TMW to TM Errors",
JOptionPane.PLAIN_MESSAGE);
} else {
JOptionPane.showMessageDialog(Jskad.this,
"Converting Tibetan Machine Web to Tibetan Machine met with perfect success.",
"Success", JOptionPane.PLAIN_MESSAGE);
}
} }
}); });
JMenuItem toTMWItem = new JMenuItem("Convert TM to TMW"); // DLC FIXME: do it just in the selection? JMenuItem toTMWItem = new JMenuItem("Convert TM to TMW"); // DLC FIXME: do it just in the selection?
toTMWItem.addActionListener(new ThdlActionListener() { toTMWItem.addActionListener(new ThdlActionListener() {
public void theRealActionPerformed(ActionEvent e) { public void theRealActionPerformed(ActionEvent e) {
((TibetanDocument)dp.getDocument()).convertToTMW(0, -1); // entire document StringBuffer errors = new StringBuffer();
boolean errorReturn
= ((TibetanDocument)dp.getDocument()).convertToTMW(0, -1, errors); // entire document
if (errorReturn) {
JOptionPane.showMessageDialog(Jskad.this,
"At least one error occurred while converting Tibetan Machine\nto Tibetan Machine Web. Your document is mostly converted,\nexcept for the glyphs found after the 72-point Tibetan Machine Web\n30-letter alphabet.\nThe following glyphs were problems:\n"
+ errors.toString(),
"TM to TMW Errors", JOptionPane.PLAIN_MESSAGE);
} else {
JOptionPane.showMessageDialog(Jskad.this,
"Converting Tibetan Machine to Tibetan Machine Web met with perfect success.",
"Success", JOptionPane.PLAIN_MESSAGE);
}
} }
}); });
toolsMenu.addSeparator(); toolsMenu.addSeparator();

View file

@ -89,7 +89,8 @@ public class TMW_RTF_TO_THDL_WYLIE {
out.println(" file. Writes the THDL Extended Wylie transliteration of that file [in"); out.println(" file. Writes the THDL Extended Wylie transliteration of that file [in");
out.println(" --to-wylie mode] or the TibetanMachine equivalent of that file [in"); out.println(" --to-wylie mode] or the TibetanMachine equivalent of that file [in");
out.println(" --to-tibetan-machine mode] to standard output after dealing with the curly"); out.println(" --to-tibetan-machine mode] to standard output after dealing with the curly");
out.println(" brace problem. Exit code is zero on success, nonzero otherwise."); out.println(" brace problem. Exit code is zero on success, 42 if some TibetanMachine glyphs");
out.println(" couldn't be understood (though output is still given), nonzero otherwise.");
out.println(""); out.println("");
out.println(" You may find it helpful to use `--find-some-non-tmw' mode before doing a"); out.println(" You may find it helpful to use `--find-some-non-tmw' mode before doing a");
out.println(" conversion so that you have confidence in the conversion's correctness."); out.println(" conversion so that you have confidence in the conversion's correctness.");
@ -126,7 +127,8 @@ public class TMW_RTF_TO_THDL_WYLIE {
} else { // conversion {to Wylie or TM} mode } else { // conversion {to Wylie or TM} mode
// Fix curly braces in the entire document: // Fix curly braces in the entire document:
((TibetanDocument)dp.getDocument()).replaceTahomaCurlyBracesAndBackslashes(0, -1); ((TibetanDocument)dp.getDocument()).replaceTahomaCurlyBracesAndBackslashes(0, -1);
int exitCode = 0;
if (convertToWylieMode) { if (convertToWylieMode) {
ThdlDebug.verify(!convertToTMMode); ThdlDebug.verify(!convertToTMMode);
// Convert to THDL Wylie: // Convert to THDL Wylie:
@ -134,14 +136,14 @@ public class TMW_RTF_TO_THDL_WYLIE {
} else { } else {
ThdlDebug.verify(convertToTMMode); ThdlDebug.verify(convertToTMMode);
// Convert to TibetanMachine: // Convert to TibetanMachine:
((TibetanDocument)dp.getDocument()).convertToTM(0, dp.getDocument().getLength()); if (!((TibetanDocument)dp.getDocument()).convertToTM(0, dp.getDocument().getLength(), null))
exitCode = 42;
} }
// Write to standard output the result: // Write to standard output the result:
((TibetanDocument)dp.getDocument()).writeRTFOutputStream(out); ((TibetanDocument)dp.getDocument()).writeRTFOutputStream(out);
// Exit normally: return exitCode;
return 0;
} }
} catch (ThdlLazyException e) { } catch (ThdlLazyException e) {
out.println("TMW_RTF_TO_THDL_WYLIE has a BUG:"); out.println("TMW_RTF_TO_THDL_WYLIE has a BUG:");

View file

@ -25,6 +25,7 @@ import javax.swing.text.rtf.RTFEditorKit;
import java.io.*; import java.io.*;
import org.thdl.util.ThdlDebug; import org.thdl.util.ThdlDebug;
import org.thdl.util.ThdlOptions;
/** Represents a character meant to be rendered in a certain font. /** Represents a character meant to be rendered in a certain font.
* @author David Chandler * @author David Chandler
@ -379,6 +380,7 @@ public class TibetanDocument extends DefaultStyledDocument {
break; break;
} }
if (null != toReplaceWith) { if (null != toReplaceWith) {
// SPEED_FIXME: determining font size might be slow
int fontSize = tibetanFontSize; int fontSize = tibetanFontSize;
try { try {
fontSize = ((Integer)getCharacterElement(i).getAttributes().getAttribute(StyleConstants.FontSize)).intValue(); fontSize = ((Integer)getCharacterElement(i).getAttributes().getAttribute(StyleConstants.FontSize)).intValue();
@ -403,9 +405,14 @@ public class TibetanDocument extends DefaultStyledDocument {
the document. Be sure to set the size for Tibetan as you like the document. Be sure to set the size for Tibetan as you like
it before using this (well, it usually gets it right on its it before using this (well, it usually gets it right on its
own, but just in case). SPEED_FIXME: might be faster to run own, but just in case). SPEED_FIXME: might be faster to run
over the elements, if they are one per font. */ over the elements, if they are one per font.
public void convertToTM(int begin, int end) { @return true on 100% success, false if any exceptional case
convertTMW_TM(begin, end, true); was encountered
@param errors if non-null, then notes about all exceptional
cases will be appended to this StringBuffer
*/
public boolean convertToTM(int begin, int end, StringBuffer errors) {
return convertTMW_TM(begin, end, true, errors);
} }
/** Converts all TibetanMachine glyphs in the document to /** Converts all TibetanMachine glyphs in the document to
@ -414,24 +421,38 @@ public class TibetanDocument extends DefaultStyledDocument {
the end of the document. Be sure to set the size for Tibetan the end of the document. Be sure to set the size for Tibetan
as you like it before using this (well, it usually gets it as you like it before using this (well, it usually gets it
right on its own, but just in case). SPEED_FIXME: might be right on its own, but just in case). SPEED_FIXME: might be
faster to run over the elements, if they are one per font. */ faster to run over the elements, if they are one per font.
public void convertToTMW(int begin, int end) { @return true on 100% success, false if any exceptional case
convertTMW_TM(begin, end, false); was encountered
@param errors if non-null, then notes about all exceptional
cases will be appended to this StringBuffer
*/
public boolean convertToTMW(int begin, int end, StringBuffer errors) {
return convertTMW_TM(begin, end, false, errors);
} }
/** Helper function. /** Helper function.
@param errors if non-null, then notes about all exceptional
cases will be appended to this StringBuffer
@return true on 100% success, false if any exceptional case
was encountered
@see convertToTMW(int,int) @see convertToTMW(int,int)
@see convertToTM(int,int) */ @see convertToTM(int,int) */
private void convertTMW_TM(int begin, int end, boolean toTM) { private boolean convertTMW_TM(int begin, int end, boolean toTM,
StringBuffer errors) {
boolean toStdout = ThdlOptions.getBooleanOption("thdl.debug");
boolean errorReturn = false;
if (end < 0) if (end < 0)
end = getLength(); end = getLength();
if (begin >= end) if (begin >= end)
return; return errorReturn; // nothing to do, so no errors in the doing.
int i = begin; int i = begin;
HashMap problemGlyphsTable = new HashMap();
try { try {
Position endPos = createPosition(end);
DuffData[] equivalent = new DuffData[1]; DuffData[] equivalent = new DuffData[1];
equivalent[0] = new DuffData(); equivalent[0] = new DuffData();
while (i < end) { while (i < endPos.getOffset()) {
AttributeSet attr = getCharacterElement(i).getAttributes(); AttributeSet attr = getCharacterElement(i).getAttributes();
String fontName = StyleConstants.getFontFamily(attr); String fontName = StyleConstants.getFontFamily(attr);
int fontNum int fontNum
@ -441,50 +462,65 @@ public class TibetanDocument extends DefaultStyledDocument {
if (0 != fontNum) { if (0 != fontNum) {
DuffCode dc = null; DuffCode dc = null;
try { if (toTM) {
if (toTM) { dc = TibetanMachineWeb.mapTMWtoTM(fontNum - 1,
dc = TibetanMachineWeb.mapTMWtoTM(fontNum - 1, getText(i,1).charAt(0));
getText(i,1).charAt(0)); } else {
} else { dc = TibetanMachineWeb.mapTMtoTMW(fontNum - 1,
dc = TibetanMachineWeb.mapTMtoTMW(fontNum - 1, getText(i,1).charAt(0));
getText(i,1).charAt(0));
}
if (null != dc) {
equivalent[0].setData(dc.getCharacter(),
dc.getFontNum());
}
} catch (ArrayIndexOutOfBoundsException e) {
// we handle this below...
System.out.println("FIXME: "
+ (toTM ? "TMW->TM" : "TM->TMW")
+ " conversion is in trouble");
System.out.println("font is " + (fontNum - 1)
+ ", char is "
+ (int)getText(i,1).charAt(0)
+ "; pos is " + i);
ThdlDebug.noteIffyCode();
} }
if (null != dc) { if (null != dc) {
equivalent[0].setData(dc.getCharacter(),
dc.getFontNum());
// SPEED_FIXME: determining font size might be slow
int fontSize = tibetanFontSize; int fontSize = tibetanFontSize;
try { try {
fontSize = ((Integer)getCharacterElement(i).getAttributes().getAttribute(StyleConstants.FontSize)).intValue(); fontSize = ((Integer)getCharacterElement(i).getAttributes().getAttribute(StyleConstants.FontSize)).intValue();
} catch (Exception e) { } catch (Exception e) {
// leave it as tibetanFontSize // leave it as tibetanFontSize
} }
insertDuff(fontSize, i, equivalent, !toTM); // We have two choices: remove-then-insert
remove(i+1, 1); // second vs. insert-then-remove and also
// insert-before vs. insert-after. It turns
// out that insert-after preserves formatting
// whereas insert-before doesn't. And we do
// insert-then-remove because we're guessing
// that helps with formatting too.
insertDuff(fontSize, i+1, equivalent, !toTM);
remove(i, 1);
} else { } else {
// DLC FIXME: insert into document a string // DLC FIXME: insert into document a string
// saying "there's no TM equivalent for this." // saying "<<[[there's no TM equivalent for
// (For now, I'm inserting the alphabet and // this, details are ...]]>>" (For now, I'm
// all the numbers in a big font in TMW to try // inserting the alphabet in a big font in TMW
// and get some attention. And I've // to try and get some attention. And I've
// *documented* this on the website.) // *documented* this on the website.)
errorReturn = true;
CharacterInAGivenFont cgf
= new CharacterInAGivenFont(getText(i,1), fontName);
if (!problemGlyphsTable.containsKey(cgf)) {
problemGlyphsTable.put(cgf, "yes this character appears once");
if (null != errors) {
String err
= (toTM ? "TMW->TM" : "TM->TMW")
+ " conversion failed for a glyph:\nFont is "
+ fontName + ", glyph number is "
+ (int)getText(i,1).charAt(0)
+ "; first position found (from zero) is "
+ i + "\n";
errors.append(err);
if (toStdout) {
System.out.print(err);
}
}
}
String trickyTMW String trickyTMW
= "!-\"-#-,-%-&-'-(-)-*-+-,-.-/-0-1-2-3-4-5-6-7-8-9-:-;-<-=->-?-0-1-2-3-4-5-6-7-8-9-"; = "!-\"-#-$-%-&-'-(-)-*-+-,-.-/-0-1-2-3-4-5-6-7-8-9-:-;-<-=->-?-";
equivalent[0] = new DuffData(trickyTMW, 1); equivalent[0] = new DuffData(trickyTMW, 1);
insertDuff(72, i, equivalent, true); insertDuff(72, i, equivalent, true);
i += trickyTMW.length() + 1; i += trickyTMW.length();
} }
} }
i++; i++;
@ -493,5 +529,6 @@ public class TibetanDocument extends DefaultStyledDocument {
ble.printStackTrace(); ble.printStackTrace();
ThdlDebug.noteIffyCode(); ThdlDebug.noteIffyCode();
} }
return errorReturn;
} }
} }

View file

@ -893,17 +893,16 @@ private static final DuffCode TMW_tab = new DuffCode(1, '\t');
Null is never returned for an existing TibetanMachine glyph, Null is never returned for an existing TibetanMachine glyph,
because every TibetanMachine glyph has a corresponding because every TibetanMachine glyph has a corresponding
TibetanMachineWeb glyph. But if (font, ord) doesn't correspond to TibetanMachineWeb glyph. Null is returned if the input isn't
an existing TibetanMachine glyph, null is returned. In general, valid.
though, this method may raise a runtime exception if you pass in a
(font, ord) that doesn't correspond to an existing TibetanMachine
glyph.
Only a few control characters are supported: '\r' (carriage Only a few control characters are supported: '\r' (carriage
return), '\n' (line feed), and '\t' (tab). return), '\n' (line feed), and '\t' (tab). */
*/ public static DuffCode mapTMtoTMW(int font, int ordinal) {
public static DuffCode mapTMtoTMW(int font, int ordinal) if (font < 0 || font > 4)
throws ArrayIndexOutOfBoundsException { return null;
if (ordinal > 255)
return getUnusualTMtoTMW(font, ordinal);
if (ordinal < 32) { if (ordinal < 32) {
if (ordinal == (int)'\r') if (ordinal == (int)'\r')
return TMW_cr; return TMW_cr;
@ -918,8 +917,6 @@ public static DuffCode mapTMtoTMW(int font, int ordinal)
} }
} }
DuffCode ans = TMtoTMW[font][ordinal-32]; DuffCode ans = TMtoTMW[font][ordinal-32];
// comment this out to test via main(..):
ThdlDebug.verify(null != ans);
return ans; return ans;
} }
@ -934,18 +931,17 @@ private static final DuffCode TM_tab = new DuffCode(1, '\t');
Null is returned for an existing TibetanMachineWeb glyph only if Null is returned for an existing TibetanMachineWeb glyph only if
that glyph is TibetanMachineWeb7.91, because every other that glyph is TibetanMachineWeb7.91, because every other
TibetanMachineWeb glyph has a corresponding TibetanMachine glyph. TibetanMachineWeb glyph has a corresponding TibetanMachine glyph.
But if (font, ord) isn't (7, 91) and doesn't correspond to an Null is returned if the input isn't valid.
existing TibetanMachineWeb glyph, null is returned. In general,
though, this method may raise a runtime exception if you pass in a
(font, ord) that doesn't correspond to an existing
TibetanMachineWeb glyph.
Only a few control characters are supported: '\r' (carriage Only a few control characters are supported: '\r' (carriage
return), '\n' (line feed), and '\t' (tab). return), '\n' (line feed), and '\t' (tab).
*/ */
public static DuffCode mapTMWtoTM(int font, int ordinal) public static DuffCode mapTMWtoTM(int font, int ordinal) {
throws ArrayIndexOutOfBoundsException { if (font < 0 || font > 9)
return null;
if (ordinal > 127)
return null;
if (ordinal < 32) { if (ordinal < 32) {
if (ordinal == (int)'\r') if (ordinal == (int)'\r')
return TM_cr; return TM_cr;
@ -960,8 +956,6 @@ public static DuffCode mapTMWtoTM(int font, int ordinal)
} }
} }
DuffCode ans = TMWtoTM[font][ordinal-32]; DuffCode ans = TMWtoTM[font][ordinal-32];
// comment this out to test via main(..):
ThdlDebug.verify(null != ans || (font == 7 && ordinal == 91));
return ans; return ans;
} }
@ -1015,49 +1009,81 @@ public static void main(String[] args) {
} }
} }
private static DuffCode getTMtoTMW(int font, int code) { /** Tibet Doc makes weird RTF where you see TibetanMachine.8225 etc.
if (false) { // DLC FIXME: why was this here? The highest possible glyph value should be 255, but that's not
if (code > 255-32) { what appears. This returns non-null if (font, code) identify an
switch (code) { oddball we know. This list may well be incomplete, but we handle
case 8218-32: //sby such oddballs in a first-class fashion. */
code = 130-32; private static DuffCode getUnusualTMtoTMW(int font, int code) {
break; if (code > 255) {
if (font == 0) {
switch (code) {
case 347: // reduced-height ha
return TMtoTMW[font][156 - 32];
case 8230-32: //sgr case 353: // d-r-w
code = 133-32; return TMtoTMW[font][154 - 32];
break;
case 8225-32: //spr case 377: // t-w
code = 135-32; return TMtoTMW[font][143 - 32];
break;
case 8117-32: //tshw case 710: // s-b-r
code = 146-32; return TMtoTMW[font][136 - 32];
break;
case 8126-32: //rw case 1026: // s-g-y
code = 149-32; return TMtoTMW[font][128 - 32];
break;
case 8482-32: //grw case 1027: // s-p-y
code = 153-32; return TMtoTMW[font][129 - 32];
break;
case 1106: // d-w
return TMtoTMW[font][144 - 32];
case 8117: // tsh-w
return TMtoTMW[font][146 - 32];
case 8126: // r-w
return TMtoTMW[font][149 - 32];
case 8218: // s-b-y
return TMtoTMW[font][130 - 32];
case 8225: // s-p-r
return TMtoTMW[font][135 - 32];
case 8230: // s-g-r
return TMtoTMW[font][133 - 32];
case 8240: // s-m-r
return TMtoTMW[font][137 - 32];
case 8482: // g-r-w
return TMtoTMW[font][153 - 32];
default: default:
return null; return null;
} }
} } else if (font == 3) {
} switch (code) {
case 402: // h+y
return TMtoTMW[font][131 - 32];
return TMtoTMW[font][code]; default:
return null;
}
} else {
return null;
}
} else {
return null;
}
} }
/** /**
* Gets the TibetanMachine font number for this font name. * Gets the TibetanMachine font number for this font name.
* @param name a font name * @param name a font name
* @return between 1 and 5 if the font is one * @return between 1 and 5 if the font is one
* of the TibetanMachine fonts, otherwise 0 * of the TibetanMachine fonts, otherwise 0 */
*/
public static int getTMFontNumber(String name) { public static int getTMFontNumber(String name) {
String internedName = name.intern(); String internedName = name.intern();
for (int i=1; i<tmFontNames.length; i++) { for (int i=1; i<tmFontNames.length; i++) {