The Wylie 'M' used to map to TMW7.91, when it should map to TMW7.90.

I've fixed that.

I've also added a couple of Unicode mappings to give a flavor for how
multi-codepoint mappings will be represented.

TM->TMW conversion takes about 1 second per thousand glyphs on my
PIII-550.
This commit is contained in:
dchandler 2003-06-01 23:05:32 +00:00
parent 54ca37c824
commit 0f724989b5
4 changed files with 184 additions and 94 deletions

View file

@ -25,6 +25,7 @@ import javax.swing.text.rtf.RTFEditorKit;
import java.io.*;
import org.thdl.util.ThdlDebug;
import org.thdl.util.ThdlOptions;
/** Represents a character meant to be rendered in a certain font.
* @author David Chandler
@ -379,6 +380,7 @@ public class TibetanDocument extends DefaultStyledDocument {
break;
}
if (null != toReplaceWith) {
// SPEED_FIXME: determining font size might be slow
int fontSize = tibetanFontSize;
try {
fontSize = ((Integer)getCharacterElement(i).getAttributes().getAttribute(StyleConstants.FontSize)).intValue();
@ -403,9 +405,14 @@ public class TibetanDocument extends DefaultStyledDocument {
the document. Be sure to set the size for Tibetan as you like
it before using this (well, it usually gets it right on its
own, but just in case). SPEED_FIXME: might be faster to run
over the elements, if they are one per font. */
public void convertToTM(int begin, int end) {
convertTMW_TM(begin, end, true);
over the elements, if they are one per font.
@return true on 100% success, false if any exceptional case
was encountered
@param errors if non-null, then notes about all exceptional
cases will be appended to this StringBuffer
*/
public boolean convertToTM(int begin, int end, StringBuffer errors) {
return convertTMW_TM(begin, end, true, errors);
}
/** Converts all TibetanMachine glyphs in the document to
@ -414,24 +421,38 @@ public class TibetanDocument extends DefaultStyledDocument {
the end of the document. Be sure to set the size for Tibetan
as you like it before using this (well, it usually gets it
right on its own, but just in case). SPEED_FIXME: might be
faster to run over the elements, if they are one per font. */
public void convertToTMW(int begin, int end) {
convertTMW_TM(begin, end, false);
faster to run over the elements, if they are one per font.
@return true on 100% success, false if any exceptional case
was encountered
@param errors if non-null, then notes about all exceptional
cases will be appended to this StringBuffer
*/
public boolean convertToTMW(int begin, int end, StringBuffer errors) {
return convertTMW_TM(begin, end, false, errors);
}
/** Helper function.
@param errors if non-null, then notes about all exceptional
cases will be appended to this StringBuffer
@return true on 100% success, false if any exceptional case
was encountered
@see convertToTMW(int,int)
@see convertToTM(int,int) */
private void convertTMW_TM(int begin, int end, boolean toTM) {
private boolean convertTMW_TM(int begin, int end, boolean toTM,
StringBuffer errors) {
boolean toStdout = ThdlOptions.getBooleanOption("thdl.debug");
boolean errorReturn = false;
if (end < 0)
end = getLength();
if (begin >= end)
return;
return errorReturn; // nothing to do, so no errors in the doing.
int i = begin;
HashMap problemGlyphsTable = new HashMap();
try {
Position endPos = createPosition(end);
DuffData[] equivalent = new DuffData[1];
equivalent[0] = new DuffData();
while (i < end) {
while (i < endPos.getOffset()) {
AttributeSet attr = getCharacterElement(i).getAttributes();
String fontName = StyleConstants.getFontFamily(attr);
int fontNum
@ -441,50 +462,65 @@ public class TibetanDocument extends DefaultStyledDocument {
if (0 != fontNum) {
DuffCode dc = null;
try {
if (toTM) {
dc = TibetanMachineWeb.mapTMWtoTM(fontNum - 1,
getText(i,1).charAt(0));
} else {
dc = TibetanMachineWeb.mapTMtoTMW(fontNum - 1,
getText(i,1).charAt(0));
}
if (null != dc) {
equivalent[0].setData(dc.getCharacter(),
dc.getFontNum());
}
} catch (ArrayIndexOutOfBoundsException e) {
// we handle this below...
System.out.println("FIXME: "
+ (toTM ? "TMW->TM" : "TM->TMW")
+ " conversion is in trouble");
System.out.println("font is " + (fontNum - 1)
+ ", char is "
+ (int)getText(i,1).charAt(0)
+ "; pos is " + i);
ThdlDebug.noteIffyCode();
if (toTM) {
dc = TibetanMachineWeb.mapTMWtoTM(fontNum - 1,
getText(i,1).charAt(0));
} else {
dc = TibetanMachineWeb.mapTMtoTMW(fontNum - 1,
getText(i,1).charAt(0));
}
if (null != dc) {
equivalent[0].setData(dc.getCharacter(),
dc.getFontNum());
// SPEED_FIXME: determining font size might be slow
int fontSize = tibetanFontSize;
try {
fontSize = ((Integer)getCharacterElement(i).getAttributes().getAttribute(StyleConstants.FontSize)).intValue();
} catch (Exception e) {
// leave it as tibetanFontSize
}
insertDuff(fontSize, i, equivalent, !toTM);
remove(i+1, 1);
// We have two choices: remove-then-insert
// second vs. insert-then-remove and also
// insert-before vs. insert-after. It turns
// out that insert-after preserves formatting
// whereas insert-before doesn't. And we do
// insert-then-remove because we're guessing
// that helps with formatting too.
insertDuff(fontSize, i+1, equivalent, !toTM);
remove(i, 1);
} else {
// DLC FIXME: insert into document a string
// saying "there's no TM equivalent for this."
// (For now, I'm inserting the alphabet and
// all the numbers in a big font in TMW to try
// and get some attention. And I've
// saying "<<[[there's no TM equivalent for
// this, details are ...]]>>" (For now, I'm
// inserting the alphabet in a big font in TMW
// to try and get some attention. And I've
// *documented* this on the website.)
errorReturn = true;
CharacterInAGivenFont cgf
= new CharacterInAGivenFont(getText(i,1), fontName);
if (!problemGlyphsTable.containsKey(cgf)) {
problemGlyphsTable.put(cgf, "yes this character appears once");
if (null != errors) {
String err
= (toTM ? "TMW->TM" : "TM->TMW")
+ " conversion failed for a glyph:\nFont is "
+ fontName + ", glyph number is "
+ (int)getText(i,1).charAt(0)
+ "; first position found (from zero) is "
+ i + "\n";
errors.append(err);
if (toStdout) {
System.out.print(err);
}
}
}
String trickyTMW
= "!-\"-#-,-%-&-'-(-)-*-+-,-.-/-0-1-2-3-4-5-6-7-8-9-:-;-<-=->-?-0-1-2-3-4-5-6-7-8-9-";
= "!-\"-#-$-%-&-'-(-)-*-+-,-.-/-0-1-2-3-4-5-6-7-8-9-:-;-<-=->-?-";
equivalent[0] = new DuffData(trickyTMW, 1);
insertDuff(72, i, equivalent, true);
i += trickyTMW.length() + 1;
i += trickyTMW.length();
}
}
i++;
@ -493,5 +529,6 @@ public class TibetanDocument extends DefaultStyledDocument {
ble.printStackTrace();
ThdlDebug.noteIffyCode();
}
return errorReturn;
}
}

View file

@ -893,17 +893,16 @@ private static final DuffCode TMW_tab = new DuffCode(1, '\t');
Null is never returned for an existing TibetanMachine glyph,
because every TibetanMachine glyph has a corresponding
TibetanMachineWeb glyph. But if (font, ord) doesn't correspond to
an existing TibetanMachine glyph, null is returned. In general,
though, this method may raise a runtime exception if you pass in a
(font, ord) that doesn't correspond to an existing TibetanMachine
glyph.
TibetanMachineWeb glyph. Null is returned if the input isn't
valid.
Only a few control characters are supported: '\r' (carriage
return), '\n' (line feed), and '\t' (tab).
*/
public static DuffCode mapTMtoTMW(int font, int ordinal)
throws ArrayIndexOutOfBoundsException {
return), '\n' (line feed), and '\t' (tab). */
public static DuffCode mapTMtoTMW(int font, int ordinal) {
if (font < 0 || font > 4)
return null;
if (ordinal > 255)
return getUnusualTMtoTMW(font, ordinal);
if (ordinal < 32) {
if (ordinal == (int)'\r')
return TMW_cr;
@ -918,8 +917,6 @@ public static DuffCode mapTMtoTMW(int font, int ordinal)
}
}
DuffCode ans = TMtoTMW[font][ordinal-32];
// comment this out to test via main(..):
ThdlDebug.verify(null != ans);
return ans;
}
@ -934,18 +931,17 @@ private static final DuffCode TM_tab = new DuffCode(1, '\t');
Null is returned for an existing TibetanMachineWeb glyph only if
that glyph is TibetanMachineWeb7.91, because every other
TibetanMachineWeb glyph has a corresponding TibetanMachine glyph.
But if (font, ord) isn't (7, 91) and doesn't correspond to an
existing TibetanMachineWeb glyph, null is returned. In general,
though, this method may raise a runtime exception if you pass in a
(font, ord) that doesn't correspond to an existing
TibetanMachineWeb glyph.
Null is returned if the input isn't valid.
Only a few control characters are supported: '\r' (carriage
return), '\n' (line feed), and '\t' (tab).
*/
public static DuffCode mapTMWtoTM(int font, int ordinal)
throws ArrayIndexOutOfBoundsException {
public static DuffCode mapTMWtoTM(int font, int ordinal) {
if (font < 0 || font > 9)
return null;
if (ordinal > 127)
return null;
if (ordinal < 32) {
if (ordinal == (int)'\r')
return TM_cr;
@ -960,8 +956,6 @@ public static DuffCode mapTMWtoTM(int font, int ordinal)
}
}
DuffCode ans = TMWtoTM[font][ordinal-32];
// comment this out to test via main(..):
ThdlDebug.verify(null != ans || (font == 7 && ordinal == 91));
return ans;
}
@ -1015,49 +1009,81 @@ public static void main(String[] args) {
}
}
private static DuffCode getTMtoTMW(int font, int code) {
if (false) { // DLC FIXME: why was this here?
if (code > 255-32) {
switch (code) {
case 8218-32: //sby
code = 130-32;
break;
/** Tibet Doc makes weird RTF where you see TibetanMachine.8225 etc.
The highest possible glyph value should be 255, but that's not
what appears. This returns non-null if (font, code) identify an
oddball we know. This list may well be incomplete, but we handle
such oddballs in a first-class fashion. */
private static DuffCode getUnusualTMtoTMW(int font, int code) {
if (code > 255) {
if (font == 0) {
switch (code) {
case 347: // reduced-height ha
return TMtoTMW[font][156 - 32];
case 8230-32: //sgr
code = 133-32;
break;
case 353: // d-r-w
return TMtoTMW[font][154 - 32];
case 8225-32: //spr
code = 135-32;
break;
case 377: // t-w
return TMtoTMW[font][143 - 32];
case 8117-32: //tshw
code = 146-32;
break;
case 710: // s-b-r
return TMtoTMW[font][136 - 32];
case 8126-32: //rw
code = 149-32;
break;
case 1026: // s-g-y
return TMtoTMW[font][128 - 32];
case 8482-32: //grw
code = 153-32;
break;
case 1027: // s-p-y
return TMtoTMW[font][129 - 32];
case 1106: // d-w
return TMtoTMW[font][144 - 32];
case 8117: // tsh-w
return TMtoTMW[font][146 - 32];
case 8126: // r-w
return TMtoTMW[font][149 - 32];
case 8218: // s-b-y
return TMtoTMW[font][130 - 32];
case 8225: // s-p-r
return TMtoTMW[font][135 - 32];
case 8230: // s-g-r
return TMtoTMW[font][133 - 32];
case 8240: // s-m-r
return TMtoTMW[font][137 - 32];
case 8482: // g-r-w
return TMtoTMW[font][153 - 32];
default:
return null;
}
}
}
}
} else if (font == 3) {
switch (code) {
case 402: // h+y
return TMtoTMW[font][131 - 32];
return TMtoTMW[font][code];
default:
return null;
}
} else {
return null;
}
} else {
return null;
}
}
/**
* Gets the TibetanMachine font number for this font name.
* @param name a font name
* @return between 1 and 5 if the font is one
* of the TibetanMachine fonts, otherwise 0
*/
* of the TibetanMachine fonts, otherwise 0 */
public static int getTMFontNumber(String name) {
String internedName = name.intern();
for (int i=1; i<tmFontNames.length; i++) {