The Wylie 'M' used to map to TMW7.91, when it should map to TMW7.90.
I've fixed that. I've also added a couple of Unicode mappings to give a flavor for how multi-codepoint mappings will be represented. TM->TMW conversion takes about 1 second per thousand glyphs on my PIII-550.
This commit is contained in:
parent
54ca37c824
commit
0f724989b5
4 changed files with 184 additions and 94 deletions
|
@ -327,14 +327,39 @@ public class Jskad extends JPanel implements DocumentListener {
|
|||
JMenuItem toTMItem = new JMenuItem("Convert TMW to TM"); // DLC FIXME: do it just in the selection?
|
||||
toTMItem.addActionListener(new ThdlActionListener() {
|
||||
public void theRealActionPerformed(ActionEvent e) {
|
||||
((TibetanDocument)dp.getDocument()).convertToTM(0, -1); // entire document
|
||||
StringBuffer errors = new StringBuffer();
|
||||
boolean errorReturn
|
||||
= ((TibetanDocument)dp.getDocument()).convertToTM(0, -1, errors); // entire document
|
||||
if (errorReturn) {
|
||||
JOptionPane.showMessageDialog(Jskad.this,
|
||||
"At least one error occurred while converting Tibetan Machine Web\nto Tibetan Machine. Your document is mostly converted,\nexcept for the glyphs found after the 72-point Tibetan Machine Web\n30-letter alphabet.\nThe following glyphs were problems:\n"
|
||||
+ errors.toString(),
|
||||
"TMW to TM Errors",
|
||||
JOptionPane.PLAIN_MESSAGE);
|
||||
} else {
|
||||
JOptionPane.showMessageDialog(Jskad.this,
|
||||
"Converting Tibetan Machine Web to Tibetan Machine met with perfect success.",
|
||||
"Success", JOptionPane.PLAIN_MESSAGE);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
JMenuItem toTMWItem = new JMenuItem("Convert TM to TMW"); // DLC FIXME: do it just in the selection?
|
||||
toTMWItem.addActionListener(new ThdlActionListener() {
|
||||
public void theRealActionPerformed(ActionEvent e) {
|
||||
((TibetanDocument)dp.getDocument()).convertToTMW(0, -1); // entire document
|
||||
StringBuffer errors = new StringBuffer();
|
||||
boolean errorReturn
|
||||
= ((TibetanDocument)dp.getDocument()).convertToTMW(0, -1, errors); // entire document
|
||||
if (errorReturn) {
|
||||
JOptionPane.showMessageDialog(Jskad.this,
|
||||
"At least one error occurred while converting Tibetan Machine\nto Tibetan Machine Web. Your document is mostly converted,\nexcept for the glyphs found after the 72-point Tibetan Machine Web\n30-letter alphabet.\nThe following glyphs were problems:\n"
|
||||
+ errors.toString(),
|
||||
"TM to TMW Errors", JOptionPane.PLAIN_MESSAGE);
|
||||
} else {
|
||||
JOptionPane.showMessageDialog(Jskad.this,
|
||||
"Converting Tibetan Machine to Tibetan Machine Web met with perfect success.",
|
||||
"Success", JOptionPane.PLAIN_MESSAGE);
|
||||
}
|
||||
}
|
||||
});
|
||||
toolsMenu.addSeparator();
|
||||
|
|
|
@ -89,7 +89,8 @@ public class TMW_RTF_TO_THDL_WYLIE {
|
|||
out.println(" file. Writes the THDL Extended Wylie transliteration of that file [in");
|
||||
out.println(" --to-wylie mode] or the TibetanMachine equivalent of that file [in");
|
||||
out.println(" --to-tibetan-machine mode] to standard output after dealing with the curly");
|
||||
out.println(" brace problem. Exit code is zero on success, nonzero otherwise.");
|
||||
out.println(" brace problem. Exit code is zero on success, 42 if some TibetanMachine glyphs");
|
||||
out.println(" couldn't be understood (though output is still given), nonzero otherwise.");
|
||||
out.println("");
|
||||
out.println(" You may find it helpful to use `--find-some-non-tmw' mode before doing a");
|
||||
out.println(" conversion so that you have confidence in the conversion's correctness.");
|
||||
|
@ -127,6 +128,7 @@ public class TMW_RTF_TO_THDL_WYLIE {
|
|||
// Fix curly braces in the entire document:
|
||||
((TibetanDocument)dp.getDocument()).replaceTahomaCurlyBracesAndBackslashes(0, -1);
|
||||
|
||||
int exitCode = 0;
|
||||
if (convertToWylieMode) {
|
||||
ThdlDebug.verify(!convertToTMMode);
|
||||
// Convert to THDL Wylie:
|
||||
|
@ -134,14 +136,14 @@ public class TMW_RTF_TO_THDL_WYLIE {
|
|||
} else {
|
||||
ThdlDebug.verify(convertToTMMode);
|
||||
// Convert to TibetanMachine:
|
||||
((TibetanDocument)dp.getDocument()).convertToTM(0, dp.getDocument().getLength());
|
||||
if (!((TibetanDocument)dp.getDocument()).convertToTM(0, dp.getDocument().getLength(), null))
|
||||
exitCode = 42;
|
||||
}
|
||||
|
||||
// Write to standard output the result:
|
||||
((TibetanDocument)dp.getDocument()).writeRTFOutputStream(out);
|
||||
|
||||
// Exit normally:
|
||||
return 0;
|
||||
return exitCode;
|
||||
}
|
||||
} catch (ThdlLazyException e) {
|
||||
out.println("TMW_RTF_TO_THDL_WYLIE has a BUG:");
|
||||
|
|
|
@ -25,6 +25,7 @@ import javax.swing.text.rtf.RTFEditorKit;
|
|||
import java.io.*;
|
||||
|
||||
import org.thdl.util.ThdlDebug;
|
||||
import org.thdl.util.ThdlOptions;
|
||||
|
||||
/** Represents a character meant to be rendered in a certain font.
|
||||
* @author David Chandler
|
||||
|
@ -379,6 +380,7 @@ public class TibetanDocument extends DefaultStyledDocument {
|
|||
break;
|
||||
}
|
||||
if (null != toReplaceWith) {
|
||||
// SPEED_FIXME: determining font size might be slow
|
||||
int fontSize = tibetanFontSize;
|
||||
try {
|
||||
fontSize = ((Integer)getCharacterElement(i).getAttributes().getAttribute(StyleConstants.FontSize)).intValue();
|
||||
|
@ -403,9 +405,14 @@ public class TibetanDocument extends DefaultStyledDocument {
|
|||
the document. Be sure to set the size for Tibetan as you like
|
||||
it before using this (well, it usually gets it right on its
|
||||
own, but just in case). SPEED_FIXME: might be faster to run
|
||||
over the elements, if they are one per font. */
|
||||
public void convertToTM(int begin, int end) {
|
||||
convertTMW_TM(begin, end, true);
|
||||
over the elements, if they are one per font.
|
||||
@return true on 100% success, false if any exceptional case
|
||||
was encountered
|
||||
@param errors if non-null, then notes about all exceptional
|
||||
cases will be appended to this StringBuffer
|
||||
*/
|
||||
public boolean convertToTM(int begin, int end, StringBuffer errors) {
|
||||
return convertTMW_TM(begin, end, true, errors);
|
||||
}
|
||||
|
||||
/** Converts all TibetanMachine glyphs in the document to
|
||||
|
@ -414,24 +421,38 @@ public class TibetanDocument extends DefaultStyledDocument {
|
|||
the end of the document. Be sure to set the size for Tibetan
|
||||
as you like it before using this (well, it usually gets it
|
||||
right on its own, but just in case). SPEED_FIXME: might be
|
||||
faster to run over the elements, if they are one per font. */
|
||||
public void convertToTMW(int begin, int end) {
|
||||
convertTMW_TM(begin, end, false);
|
||||
faster to run over the elements, if they are one per font.
|
||||
@return true on 100% success, false if any exceptional case
|
||||
was encountered
|
||||
@param errors if non-null, then notes about all exceptional
|
||||
cases will be appended to this StringBuffer
|
||||
*/
|
||||
public boolean convertToTMW(int begin, int end, StringBuffer errors) {
|
||||
return convertTMW_TM(begin, end, false, errors);
|
||||
}
|
||||
|
||||
/** Helper function.
|
||||
@param errors if non-null, then notes about all exceptional
|
||||
cases will be appended to this StringBuffer
|
||||
@return true on 100% success, false if any exceptional case
|
||||
was encountered
|
||||
@see convertToTMW(int,int)
|
||||
@see convertToTM(int,int) */
|
||||
private void convertTMW_TM(int begin, int end, boolean toTM) {
|
||||
private boolean convertTMW_TM(int begin, int end, boolean toTM,
|
||||
StringBuffer errors) {
|
||||
boolean toStdout = ThdlOptions.getBooleanOption("thdl.debug");
|
||||
boolean errorReturn = false;
|
||||
if (end < 0)
|
||||
end = getLength();
|
||||
if (begin >= end)
|
||||
return;
|
||||
return errorReturn; // nothing to do, so no errors in the doing.
|
||||
int i = begin;
|
||||
HashMap problemGlyphsTable = new HashMap();
|
||||
try {
|
||||
Position endPos = createPosition(end);
|
||||
DuffData[] equivalent = new DuffData[1];
|
||||
equivalent[0] = new DuffData();
|
||||
while (i < end) {
|
||||
while (i < endPos.getOffset()) {
|
||||
AttributeSet attr = getCharacterElement(i).getAttributes();
|
||||
String fontName = StyleConstants.getFontFamily(attr);
|
||||
int fontNum
|
||||
|
@ -441,7 +462,6 @@ public class TibetanDocument extends DefaultStyledDocument {
|
|||
|
||||
if (0 != fontNum) {
|
||||
DuffCode dc = null;
|
||||
try {
|
||||
if (toTM) {
|
||||
dc = TibetanMachineWeb.mapTMWtoTM(fontNum - 1,
|
||||
getText(i,1).charAt(0));
|
||||
|
@ -452,39 +472,55 @@ public class TibetanDocument extends DefaultStyledDocument {
|
|||
if (null != dc) {
|
||||
equivalent[0].setData(dc.getCharacter(),
|
||||
dc.getFontNum());
|
||||
}
|
||||
} catch (ArrayIndexOutOfBoundsException e) {
|
||||
// we handle this below...
|
||||
System.out.println("FIXME: "
|
||||
+ (toTM ? "TMW->TM" : "TM->TMW")
|
||||
+ " conversion is in trouble");
|
||||
System.out.println("font is " + (fontNum - 1)
|
||||
+ ", char is "
|
||||
+ (int)getText(i,1).charAt(0)
|
||||
+ "; pos is " + i);
|
||||
ThdlDebug.noteIffyCode();
|
||||
}
|
||||
if (null != dc) {
|
||||
// SPEED_FIXME: determining font size might be slow
|
||||
int fontSize = tibetanFontSize;
|
||||
try {
|
||||
fontSize = ((Integer)getCharacterElement(i).getAttributes().getAttribute(StyleConstants.FontSize)).intValue();
|
||||
} catch (Exception e) {
|
||||
// leave it as tibetanFontSize
|
||||
}
|
||||
insertDuff(fontSize, i, equivalent, !toTM);
|
||||
remove(i+1, 1);
|
||||
// We have two choices: remove-then-insert
|
||||
// second vs. insert-then-remove and also
|
||||
// insert-before vs. insert-after. It turns
|
||||
// out that insert-after preserves formatting
|
||||
// whereas insert-before doesn't. And we do
|
||||
// insert-then-remove because we're guessing
|
||||
// that helps with formatting too.
|
||||
insertDuff(fontSize, i+1, equivalent, !toTM);
|
||||
remove(i, 1);
|
||||
} else {
|
||||
// DLC FIXME: insert into document a string
|
||||
// saying "there's no TM equivalent for this."
|
||||
// (For now, I'm inserting the alphabet and
|
||||
// all the numbers in a big font in TMW to try
|
||||
// and get some attention. And I've
|
||||
// saying "<<[[there's no TM equivalent for
|
||||
// this, details are ...]]>>" (For now, I'm
|
||||
// inserting the alphabet in a big font in TMW
|
||||
// to try and get some attention. And I've
|
||||
// *documented* this on the website.)
|
||||
|
||||
errorReturn = true;
|
||||
CharacterInAGivenFont cgf
|
||||
= new CharacterInAGivenFont(getText(i,1), fontName);
|
||||
if (!problemGlyphsTable.containsKey(cgf)) {
|
||||
problemGlyphsTable.put(cgf, "yes this character appears once");
|
||||
if (null != errors) {
|
||||
String err
|
||||
= (toTM ? "TMW->TM" : "TM->TMW")
|
||||
+ " conversion failed for a glyph:\nFont is "
|
||||
+ fontName + ", glyph number is "
|
||||
+ (int)getText(i,1).charAt(0)
|
||||
+ "; first position found (from zero) is "
|
||||
+ i + "\n";
|
||||
errors.append(err);
|
||||
if (toStdout) {
|
||||
System.out.print(err);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
String trickyTMW
|
||||
= "!-\"-#-,-%-&-'-(-)-*-+-,-.-/-0-1-2-3-4-5-6-7-8-9-:-;-<-=->-?-0-1-2-3-4-5-6-7-8-9-";
|
||||
= "!-\"-#-$-%-&-'-(-)-*-+-,-.-/-0-1-2-3-4-5-6-7-8-9-:-;-<-=->-?-";
|
||||
equivalent[0] = new DuffData(trickyTMW, 1);
|
||||
insertDuff(72, i, equivalent, true);
|
||||
i += trickyTMW.length() + 1;
|
||||
i += trickyTMW.length();
|
||||
}
|
||||
}
|
||||
i++;
|
||||
|
@ -493,5 +529,6 @@ public class TibetanDocument extends DefaultStyledDocument {
|
|||
ble.printStackTrace();
|
||||
ThdlDebug.noteIffyCode();
|
||||
}
|
||||
return errorReturn;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -893,17 +893,16 @@ private static final DuffCode TMW_tab = new DuffCode(1, '\t');
|
|||
|
||||
Null is never returned for an existing TibetanMachine glyph,
|
||||
because every TibetanMachine glyph has a corresponding
|
||||
TibetanMachineWeb glyph. But if (font, ord) doesn't correspond to
|
||||
an existing TibetanMachine glyph, null is returned. In general,
|
||||
though, this method may raise a runtime exception if you pass in a
|
||||
(font, ord) that doesn't correspond to an existing TibetanMachine
|
||||
glyph.
|
||||
TibetanMachineWeb glyph. Null is returned if the input isn't
|
||||
valid.
|
||||
|
||||
Only a few control characters are supported: '\r' (carriage
|
||||
return), '\n' (line feed), and '\t' (tab).
|
||||
*/
|
||||
public static DuffCode mapTMtoTMW(int font, int ordinal)
|
||||
throws ArrayIndexOutOfBoundsException {
|
||||
return), '\n' (line feed), and '\t' (tab). */
|
||||
public static DuffCode mapTMtoTMW(int font, int ordinal) {
|
||||
if (font < 0 || font > 4)
|
||||
return null;
|
||||
if (ordinal > 255)
|
||||
return getUnusualTMtoTMW(font, ordinal);
|
||||
if (ordinal < 32) {
|
||||
if (ordinal == (int)'\r')
|
||||
return TMW_cr;
|
||||
|
@ -918,8 +917,6 @@ public static DuffCode mapTMtoTMW(int font, int ordinal)
|
|||
}
|
||||
}
|
||||
DuffCode ans = TMtoTMW[font][ordinal-32];
|
||||
// comment this out to test via main(..):
|
||||
ThdlDebug.verify(null != ans);
|
||||
return ans;
|
||||
}
|
||||
|
||||
|
@ -934,18 +931,17 @@ private static final DuffCode TM_tab = new DuffCode(1, '\t');
|
|||
Null is returned for an existing TibetanMachineWeb glyph only if
|
||||
that glyph is TibetanMachineWeb7.91, because every other
|
||||
TibetanMachineWeb glyph has a corresponding TibetanMachine glyph.
|
||||
But if (font, ord) isn't (7, 91) and doesn't correspond to an
|
||||
existing TibetanMachineWeb glyph, null is returned. In general,
|
||||
though, this method may raise a runtime exception if you pass in a
|
||||
(font, ord) that doesn't correspond to an existing
|
||||
TibetanMachineWeb glyph.
|
||||
Null is returned if the input isn't valid.
|
||||
|
||||
Only a few control characters are supported: '\r' (carriage
|
||||
return), '\n' (line feed), and '\t' (tab).
|
||||
|
||||
*/
|
||||
public static DuffCode mapTMWtoTM(int font, int ordinal)
|
||||
throws ArrayIndexOutOfBoundsException {
|
||||
public static DuffCode mapTMWtoTM(int font, int ordinal) {
|
||||
if (font < 0 || font > 9)
|
||||
return null;
|
||||
if (ordinal > 127)
|
||||
return null;
|
||||
if (ordinal < 32) {
|
||||
if (ordinal == (int)'\r')
|
||||
return TM_cr;
|
||||
|
@ -960,8 +956,6 @@ public static DuffCode mapTMWtoTM(int font, int ordinal)
|
|||
}
|
||||
}
|
||||
DuffCode ans = TMWtoTM[font][ordinal-32];
|
||||
// comment this out to test via main(..):
|
||||
ThdlDebug.verify(null != ans || (font == 7 && ordinal == 91));
|
||||
return ans;
|
||||
}
|
||||
|
||||
|
@ -1015,49 +1009,81 @@ public static void main(String[] args) {
|
|||
}
|
||||
}
|
||||
|
||||
private static DuffCode getTMtoTMW(int font, int code) {
|
||||
if (false) { // DLC FIXME: why was this here?
|
||||
if (code > 255-32) {
|
||||
/** Tibet Doc makes weird RTF where you see TibetanMachine.8225 etc.
|
||||
The highest possible glyph value should be 255, but that's not
|
||||
what appears. This returns non-null if (font, code) identify an
|
||||
oddball we know. This list may well be incomplete, but we handle
|
||||
such oddballs in a first-class fashion. */
|
||||
private static DuffCode getUnusualTMtoTMW(int font, int code) {
|
||||
if (code > 255) {
|
||||
if (font == 0) {
|
||||
switch (code) {
|
||||
case 8218-32: //sby
|
||||
code = 130-32;
|
||||
break;
|
||||
case 347: // reduced-height ha
|
||||
return TMtoTMW[font][156 - 32];
|
||||
|
||||
case 8230-32: //sgr
|
||||
code = 133-32;
|
||||
break;
|
||||
case 353: // d-r-w
|
||||
return TMtoTMW[font][154 - 32];
|
||||
|
||||
case 8225-32: //spr
|
||||
code = 135-32;
|
||||
break;
|
||||
case 377: // t-w
|
||||
return TMtoTMW[font][143 - 32];
|
||||
|
||||
case 8117-32: //tshw
|
||||
code = 146-32;
|
||||
break;
|
||||
case 710: // s-b-r
|
||||
return TMtoTMW[font][136 - 32];
|
||||
|
||||
case 8126-32: //rw
|
||||
code = 149-32;
|
||||
break;
|
||||
case 1026: // s-g-y
|
||||
return TMtoTMW[font][128 - 32];
|
||||
|
||||
case 8482-32: //grw
|
||||
code = 153-32;
|
||||
break;
|
||||
case 1027: // s-p-y
|
||||
return TMtoTMW[font][129 - 32];
|
||||
|
||||
case 1106: // d-w
|
||||
return TMtoTMW[font][144 - 32];
|
||||
|
||||
case 8117: // tsh-w
|
||||
return TMtoTMW[font][146 - 32];
|
||||
|
||||
case 8126: // r-w
|
||||
return TMtoTMW[font][149 - 32];
|
||||
|
||||
case 8218: // s-b-y
|
||||
return TMtoTMW[font][130 - 32];
|
||||
|
||||
case 8225: // s-p-r
|
||||
return TMtoTMW[font][135 - 32];
|
||||
|
||||
case 8230: // s-g-r
|
||||
return TMtoTMW[font][133 - 32];
|
||||
|
||||
case 8240: // s-m-r
|
||||
return TMtoTMW[font][137 - 32];
|
||||
|
||||
case 8482: // g-r-w
|
||||
return TMtoTMW[font][153 - 32];
|
||||
|
||||
default:
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (font == 3) {
|
||||
switch (code) {
|
||||
case 402: // h+y
|
||||
return TMtoTMW[font][131 - 32];
|
||||
|
||||
return TMtoTMW[font][code];
|
||||
default:
|
||||
return null;
|
||||
}
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the TibetanMachine font number for this font name.
|
||||
* @param name a font name
|
||||
* @return between 1 and 5 if the font is one
|
||||
* of the TibetanMachine fonts, otherwise 0
|
||||
*/
|
||||
* of the TibetanMachine fonts, otherwise 0 */
|
||||
public static int getTMFontNumber(String name) {
|
||||
String internedName = name.intern();
|
||||
for (int i=1; i<tmFontNames.length; i++) {
|
||||
|
|
Loading…
Reference in a new issue