TM->TMW and TMW->TM conversion in RTF is now supported. I've
noticed that formatting is mostly OK but sometimes gets bungled slightly. I tried everything I could think of, and now I'm passing the buck to Java's RTF support. TMW_RTF_TO_THDL_WYLIE (now misnamed) support TMW->TM conversion (but not TM->TMW). There is an automated test case for a TMW->TM conversion. I have full confidence in this conversion. Even the smallest glitch in the core functionality (not formatting) would surprise me. Note that the JUnit test TMW_RTF_TO_THDL_WYLIETest sometimes fails due to one- or two-line diffs between the actual and expected outputs. This is because Java's RTF support is not deterministic, I'm guessing, and is not a real failure. I'm too lazy to make a more elaborate sed/diff mechanism that works on all platforms, and that would complicate the build anyway.
This commit is contained in:
parent
56d5ac7210
commit
0235263ddf
8 changed files with 416 additions and 103 deletions
|
@ -19,9 +19,13 @@ Contributor(s): ______________________________________.
|
|||
package org.thdl.tib.text;
|
||||
|
||||
/**
|
||||
* A wrapper object for a stretch of TibetanMachineWeb data that shares the same font.
|
||||
* A piece of DuffData consists of a font number and a string.
|
||||
* The font number is a number from one to ten, corresponding
|
||||
* A wrapper object for a stretch of TibetanMachineWeb (TMW) or
|
||||
* TibetanMachine (TM) data that shares the same font. A piece of
|
||||
* DuffData consists of a font number and a string. The fact that this
|
||||
* stretch is TMW vs. TM is not stored in this object; the client must
|
||||
* remember that itself.
|
||||
*
|
||||
* For TMW, the font number is a number from one to ten, corresponding
|
||||
* to the ten TibetanMachineWeb fonts, as follows:
|
||||
* <p>
|
||||
* 1 - TibetanMachineWeb<br>
|
||||
|
@ -29,9 +33,18 @@ package org.thdl.tib.text;
|
|||
* ...<br>
|
||||
* 10 - TibetanMachineWeb9<br>
|
||||
* <p>
|
||||
* The string represents a contiguous stretch of data in that
|
||||
* font, i.e. a stretch of TibetanMachineWeb that doesn't require a font change.
|
||||
*/
|
||||
* For TM, the font number is a number from one to five, corresponding
|
||||
* to the five TibetanMachineWeb fonts, as follows:
|
||||
* <p>
|
||||
* 1 - TibetanMachine<br>
|
||||
* 2 - TibetanMachineSkt1<br>
|
||||
* 3 - TibetanMachineSkt2<br>
|
||||
* 4 - TibetanMachineSkt3<br>
|
||||
* 5 - TibetanMachineSkt4<br>
|
||||
* <p>
|
||||
* The string represents a contiguous stretch of data in that font,
|
||||
* i.e. a stretch of TibetanMachineWeb or TibetanMachine that doesn't
|
||||
* require a font change. */
|
||||
public class DuffData {
|
||||
/**
|
||||
* a string of text
|
||||
|
@ -43,11 +56,24 @@ public class DuffData {
|
|||
public int font;
|
||||
|
||||
/**
|
||||
* @param s a string of TibetanMachineWeb text
|
||||
* @param i a TibetanMachineWeb font number
|
||||
* @param s a string of TibetanMachineWeb or TibetanMachine text
|
||||
* @param i a TibetanMachineWeb or TibetanMachine font number
|
||||
*/
|
||||
public DuffData(String s, int i) {
|
||||
text = s;
|
||||
font = i;
|
||||
}
|
||||
|
||||
/** Default constructor. The DuffData is invalid after this until
|
||||
you call setData or manipulate the public fields. */
|
||||
public DuffData() { }
|
||||
|
||||
/** Changes the text and font this DuffData represents.
|
||||
* @param c a character of TibetanMachineWeb or TibetanMachine text
|
||||
* @param i a TibetanMachineWeb or TibetanMachine font number
|
||||
*/
|
||||
public void setData(char c, int i) {
|
||||
text = new String(new char[] { c });
|
||||
font = i;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -150,16 +150,18 @@ public class TibetanDocument extends DefaultStyledDocument {
|
|||
* @param pos the position at which you want to insert text
|
||||
*/
|
||||
public int insertDuff(int pos, DuffData[] glyphs) {
|
||||
return insertDuff(tibetanFontSize, pos, glyphs);
|
||||
return insertDuff(tibetanFontSize, pos, glyphs, true);
|
||||
}
|
||||
|
||||
private int insertDuff(int fontSize, int pos, DuffData[] glyphs) {
|
||||
private int insertDuff(int fontSize, int pos, DuffData[] glyphs, boolean asTMW) {
|
||||
if (glyphs == null)
|
||||
return pos;
|
||||
|
||||
MutableAttributeSet mas;
|
||||
for (int i=0; i<glyphs.length; i++) {
|
||||
mas = TibetanMachineWeb.getAttributeSet(glyphs[i].font);
|
||||
mas = ((asTMW)
|
||||
? TibetanMachineWeb.getAttributeSet(glyphs[i].font)
|
||||
: TibetanMachineWeb.getAttributeSetTM(glyphs[i].font));
|
||||
appendDuff(fontSize, pos, glyphs[i].text, mas);
|
||||
pos += glyphs[i].text.length();
|
||||
}
|
||||
|
@ -350,7 +352,8 @@ public class TibetanDocument extends DefaultStyledDocument {
|
|||
TibetanMachineWeb. Works within the range [start, end).
|
||||
Using a negative number for end means that this will run to
|
||||
the end of the document. Be sure to set the size for Tibetan
|
||||
as you like it before using this. SPEED_FIXME: might be
|
||||
as you like it before using this (well, it usually gets it
|
||||
right on its own, but just in case). SPEED_FIXME: might be
|
||||
faster to run over the elements, if they are one per font. */
|
||||
public void replaceTahomaCurlyBracesAndBackslashes(int begin, int end) {
|
||||
if (end < 0)
|
||||
|
@ -382,7 +385,7 @@ public class TibetanDocument extends DefaultStyledDocument {
|
|||
} catch (Exception e) {
|
||||
// leave it as tibetanFontSize
|
||||
}
|
||||
insertDuff(fontSize, i, toReplaceWith);
|
||||
insertDuff(fontSize, i, toReplaceWith, true);
|
||||
remove(i+1, 1);
|
||||
}
|
||||
}
|
||||
|
@ -393,4 +396,102 @@ public class TibetanDocument extends DefaultStyledDocument {
|
|||
ThdlDebug.noteIffyCode();
|
||||
}
|
||||
}
|
||||
|
||||
/** Converts all TibetanMachineWeb glyphs in the document to
|
||||
TibetanMachine. Works within the range [start, end). Using a
|
||||
negative number for end means that this will run to the end of
|
||||
the document. Be sure to set the size for Tibetan as you like
|
||||
it before using this (well, it usually gets it right on its
|
||||
own, but just in case). SPEED_FIXME: might be faster to run
|
||||
over the elements, if they are one per font. */
|
||||
public void convertToTM(int begin, int end) {
|
||||
convertTMW_TM(begin, end, true);
|
||||
}
|
||||
|
||||
/** Converts all TibetanMachine glyphs in the document to
|
||||
TibetanMachineWeb. Works within the range [start, end).
|
||||
Using a negative number for end means that this will run to
|
||||
the end of the document. Be sure to set the size for Tibetan
|
||||
as you like it before using this (well, it usually gets it
|
||||
right on its own, but just in case). SPEED_FIXME: might be
|
||||
faster to run over the elements, if they are one per font. */
|
||||
public void convertToTMW(int begin, int end) {
|
||||
convertTMW_TM(begin, end, false);
|
||||
}
|
||||
|
||||
/** Helper function.
|
||||
@see convertToTMW(int,int)
|
||||
@see convertToTM(int,int) */
|
||||
private void convertTMW_TM(int begin, int end, boolean toTM) {
|
||||
if (end < 0)
|
||||
end = getLength();
|
||||
if (begin >= end)
|
||||
return;
|
||||
int i = begin;
|
||||
try {
|
||||
DuffData[] equivalent = new DuffData[1];
|
||||
equivalent[0] = new DuffData();
|
||||
while (i < end) {
|
||||
AttributeSet attr = getCharacterElement(i).getAttributes();
|
||||
String fontName = StyleConstants.getFontFamily(attr);
|
||||
int fontNum
|
||||
= (toTM
|
||||
? TibetanMachineWeb.getTMWFontNumber(fontName)
|
||||
: TibetanMachineWeb.getTMFontNumber(fontName));
|
||||
|
||||
if (0 != fontNum) {
|
||||
DuffCode dc = null;
|
||||
try {
|
||||
if (toTM) {
|
||||
dc = TibetanMachineWeb.mapTMWtoTM(fontNum - 1,
|
||||
getText(i,1).charAt(0));
|
||||
} else {
|
||||
dc = TibetanMachineWeb.mapTMtoTMW(fontNum - 1,
|
||||
getText(i,1).charAt(0));
|
||||
}
|
||||
if (null != dc) {
|
||||
equivalent[0].setData(dc.getCharacter(),
|
||||
dc.getFontNum());
|
||||
}
|
||||
} catch (ArrayIndexOutOfBoundsException e) {
|
||||
// we handle this below...
|
||||
System.out.println("FIXME: "
|
||||
+ (toTM ? "TMW->TM" : "TM->TMW")
|
||||
+ " conversion is in trouble");
|
||||
System.out.println("font is " + (fontNum - 1)
|
||||
+ ", char is "
|
||||
+ (int)getText(i,1).charAt(0)
|
||||
+ "; pos is " + i);
|
||||
ThdlDebug.noteIffyCode();
|
||||
}
|
||||
if (null != dc) {
|
||||
int fontSize = tibetanFontSize;
|
||||
try {
|
||||
fontSize = ((Integer)getCharacterElement(i).getAttributes().getAttribute(StyleConstants.FontSize)).intValue();
|
||||
} catch (Exception e) {
|
||||
// leave it as tibetanFontSize
|
||||
}
|
||||
insertDuff(fontSize, i, equivalent, !toTM);
|
||||
remove(i+1, 1);
|
||||
} else {
|
||||
// DLC FIXME: insert into document a string
|
||||
// saying "there's no TM equivalent for this."
|
||||
// (For now, I'm inserting the alphabet and
|
||||
// all the numbers in a big font in TMW to try
|
||||
// and get some attention. And I've
|
||||
// *documented* this on the website.)
|
||||
String trickyTMW
|
||||
= "!-\"-#-,-%-&-'-(-)-*-+-,-.-/-0-1-2-3-4-5-6-7-8-9-:-;-<-=->-?-0-1-2-3-4-5-6-7-8-9-";
|
||||
equivalent[0] = new DuffData(trickyTMW, 1);
|
||||
insertDuff(72, i, equivalent, true);
|
||||
i += trickyTMW.length() + 1;
|
||||
}
|
||||
}
|
||||
i++;
|
||||
}
|
||||
} catch (BadLocationException ble) {
|
||||
ble.printStackTrace();
|
||||
ThdlDebug.noteIffyCode();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -74,7 +74,12 @@ public class TibetanMachineWeb implements THDLWylieConstants {
|
|||
private static String fileName = "tibwn.ini";
|
||||
private static final String DELIMITER = "~";
|
||||
private static Set top_vowels;
|
||||
/** a way of encoding the choice of TibetanMachineWeb font from
|
||||
that family of 10 fonts: */
|
||||
private static SimpleAttributeSet[] webFontAttributeSet = new SimpleAttributeSet[11];
|
||||
/** a way of encoding the choice of TibetanMachine font from
|
||||
that family of 5 fonts: */
|
||||
private static SimpleAttributeSet[] normFontAttributeSet = new SimpleAttributeSet[6];
|
||||
private static boolean hasDisambiguatingKey; //to disambiguate gy and g.y=
|
||||
private static char disambiguating_key;
|
||||
private static boolean hasSanskritStackingKey; //for stacking Sanskrit
|
||||
|
@ -244,6 +249,12 @@ public class TibetanMachineWeb implements THDLWylieConstants {
|
|||
StyleConstants.setFontFamily(webFontAttributeSet[i],tmwFontNames[i]);
|
||||
}
|
||||
|
||||
normFontAttributeSet[0] = null;
|
||||
for (int i=1; i<normFontAttributeSet.length; i++) {
|
||||
normFontAttributeSet[i] = new SimpleAttributeSet();
|
||||
StyleConstants.setFontFamily(normFontAttributeSet[i],tmFontNames[i]);
|
||||
}
|
||||
|
||||
StringTokenizer sTok;
|
||||
topSet = new HashSet();
|
||||
leftSet = new HashSet();
|
||||
|
@ -560,6 +571,23 @@ public static SimpleAttributeSet getAttributeSet(int font) {
|
|||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the AttributeSet for the given TibetanMachine font.
|
||||
* This information is required in order to be able to put styled
|
||||
* text into {@link TibetanDocument TibetanDocument}.
|
||||
* @param font the number of the TibetanMachineWeb font for which
|
||||
* you want the SimpleAttributeSet: TibetanMachine = 1,
|
||||
* TibetanMachineSkt1 = 2, etc. up to 5
|
||||
* @return a SimpleAttributeSet for the given font - that is,
|
||||
* a way of encoding the font itself
|
||||
*/
|
||||
public static SimpleAttributeSet getAttributeSetTM(int font) {
|
||||
if (font > -1 && font < normFontAttributeSet.length)
|
||||
return normFontAttributeSet[font];
|
||||
else
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Says whether or not the character is formatting.
|
||||
* @param c the character to be checked
|
||||
|
@ -851,6 +879,10 @@ public static DuffCode getHalfHeightGlyph(String hashKey) {
|
|||
return dc[REDUCED_C];
|
||||
}
|
||||
|
||||
private static final DuffCode TMW_cr = new DuffCode(1, '\r');
|
||||
private static final DuffCode TMW_lf = new DuffCode(1, '\n');
|
||||
private static final DuffCode TMW_tab = new DuffCode(1, '\t');
|
||||
|
||||
/** Returns the DuffCode for the TibetanMachineWeb glyph corresponding
|
||||
to the given TibetanMachine font
|
||||
(0=norm,1=Skt1,2=Skt2,3=Skt3,4=Skt4) and character(32-254).
|
||||
|
@ -861,14 +893,36 @@ public static DuffCode getHalfHeightGlyph(String hashKey) {
|
|||
an existing TibetanMachine glyph, null is returned. In general,
|
||||
though, this method may raise a runtime exception if you pass in a
|
||||
(font, ord) that doesn't correspond to an existing TibetanMachine
|
||||
glyph. */
|
||||
public static DuffCode mapTMtoTMW(int font, int ordinal) {
|
||||
glyph.
|
||||
|
||||
Only a few control characters are supported: '\r' (carriage
|
||||
return), '\n' (line feed), and '\t' (tab).
|
||||
*/
|
||||
public static DuffCode mapTMtoTMW(int font, int ordinal)
|
||||
throws ArrayIndexOutOfBoundsException {
|
||||
if (ordinal < 32) {
|
||||
if (ordinal == (int)'\r')
|
||||
return TMW_cr;
|
||||
else if (ordinal == (int)'\n')
|
||||
return TMW_lf;
|
||||
else if (ordinal == (int)'\t')
|
||||
return TMW_tab;
|
||||
else {
|
||||
// for robustness, just return font 1, char ordinal.
|
||||
ThdlDebug.noteIffyCode();
|
||||
return null;
|
||||
}
|
||||
}
|
||||
DuffCode ans = TMtoTMW[font][ordinal-32];
|
||||
// comment this out to test via main(..):
|
||||
ThdlDebug.verify(null != ans);
|
||||
return ans;
|
||||
}
|
||||
|
||||
private static final DuffCode TM_cr = new DuffCode(1, '\r');
|
||||
private static final DuffCode TM_lf = new DuffCode(1, '\n');
|
||||
private static final DuffCode TM_tab = new DuffCode(1, '\t');
|
||||
|
||||
/** Returns the DuffCode for the TibetanMachine glyph corresponding to
|
||||
the given TibetanMachineWeb font
|
||||
(0=TibetanMachineWeb,1=TibetanMachineWeb1,...) and character(32-127).
|
||||
|
@ -880,8 +934,27 @@ public static DuffCode mapTMtoTMW(int font, int ordinal) {
|
|||
existing TibetanMachineWeb glyph, null is returned. In general,
|
||||
though, this method may raise a runtime exception if you pass in a
|
||||
(font, ord) that doesn't correspond to an existing
|
||||
TibetanMachineWeb glyph. */
|
||||
public static DuffCode mapTMWtoTM(int font, int ordinal) {
|
||||
TibetanMachineWeb glyph.
|
||||
|
||||
Only a few control characters are supported: '\r' (carriage
|
||||
return), '\n' (line feed), and '\t' (tab).
|
||||
|
||||
*/
|
||||
public static DuffCode mapTMWtoTM(int font, int ordinal)
|
||||
throws ArrayIndexOutOfBoundsException {
|
||||
if (ordinal < 32) {
|
||||
if (ordinal == (int)'\r')
|
||||
return TM_cr;
|
||||
else if (ordinal == (int)'\n')
|
||||
return TM_lf;
|
||||
else if (ordinal == (int)'\t')
|
||||
return TM_tab;
|
||||
else {
|
||||
// for robustness, just return font 1, char ordinal.
|
||||
ThdlDebug.noteIffyCode();
|
||||
return null;
|
||||
}
|
||||
}
|
||||
DuffCode ans = TMWtoTM[font][ordinal-32];
|
||||
// comment this out to test via main(..):
|
||||
ThdlDebug.verify(null != ans || (font == 7 && ordinal == 91));
|
||||
|
@ -975,7 +1048,13 @@ private static DuffCode getTMtoTMW(int font, int code) {
|
|||
return TMtoTMW[font][code];
|
||||
}
|
||||
|
||||
private static int getTMFontNumber(String name) {
|
||||
/**
|
||||
* Gets the TibetanMachine font number for this font name.
|
||||
* @param name a font name
|
||||
* @return between 1 and 5 if the font is one
|
||||
* of the TibetanMachine fonts, otherwise 0
|
||||
*/
|
||||
public static int getTMFontNumber(String name) {
|
||||
String internedName = name.intern();
|
||||
for (int i=1; i<tmFontNames.length; i++) {
|
||||
if (internedName == tmFontNames[i])
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue