EWTS->TMW fixes. Wowel handling still isn't perfect but I'm lazy.

Jskad now uses the new EWTS->TMW routine, not the old, and thus the
"(Buggy)" label is [unfairly, perhaps] dropped.
This commit is contained in:
dchandler 2005-07-07 01:30:03 +00:00
parent 0f99c402df
commit 982350371d
7 changed files with 129 additions and 86 deletions

View file

@ -1628,33 +1628,17 @@ public void paste(int offset)
*/ */
public void toTibetanMachineWeb(String wylie, int offset) { public void toTibetanMachineWeb(String wylie, int offset) {
try { try {
StringTokenizer sTok = new StringTokenizer(wylie, "\n\t", true); // FIXME does this work on all platforms? TibTextUtils.insertTibetanMachineWebForTranslit(
while (sTok.hasMoreTokens()) { true, wylie, getTibDoc(), offset,
String next = sTok.nextToken(); false // warnings?
if (next.equals("\n") || next.equals("\t")) { // FIXME does this work on all platforms? );
try { } catch (InvalidTransliterationException ite) {
getTibDoc().insertString(offset, next, null); JOptionPane.showMessageDialog(
offset++; this,
} catch (BadLocationException ble) { "The transliteration you are trying to convert is invalid:\n"
ble.printStackTrace(); + ite.getMessage());
ThdlDebug.noteIffyCode(); return;
}
} else {
DuffData[] dd = TibTextUtils.getTibetanMachineWebForEWTS(next);
offset = getTibDoc().insertDuff(offset, dd);
}
}
} }
catch (InvalidWylieException iwe) {
JOptionPane.showMessageDialog(this,
"The Wylie you are trying to convert is invalid, " +
"beginning from:\n " + iwe.getCulpritInContext() + "\n" +
"The culprit is probably the character '"+iwe.getCulprit()+"'.");
}
catch (Exception e)
{
System.err.println("Could not convert: " + wylie);
}
} }
/** /**
@ -1701,30 +1685,16 @@ public void paste(int offset)
if ((0 != TibetanMachineWeb.getTMWFontNumber(fontName)) || i==endPos.getOffset()) { if ((0 != TibetanMachineWeb.getTMWFontNumber(fontName)) || i==endPos.getOffset()) {
if (i != start) { if (i != start) {
try { try {
DuffData[] duffdata = null; getTibDoc().remove(start, i-start);
if (fromACIP) { i += -1 /* because i++ will occur */
getTibDoc().remove(start, i-start); + TibTextUtils.insertTibetanMachineWebForTranslit(
i += -1 /* because i++ will occur */ !fromACIP, sb.toString(), getTibDoc(),
+ TibTextUtils.insertTibetanMachineWebForACIP(sb.toString(), start, withWarnings);
getTibDoc(), } catch (InvalidTransliterationException ite) {
start, JOptionPane.showMessageDialog(
withWarnings); this,
} else "The transliteration you are trying to convert is invalid:\n"
duffdata = TibTextUtils.getTibetanMachineWebForEWTS(sb.toString()); + ite.getMessage());
if (!fromACIP) {
getTibDoc().remove(start, i-start);
getTibDoc().insertDuff(start, duffdata);
}
} catch (InvalidWylieException iwe) {
JOptionPane.showMessageDialog(this,
"The Wylie you are trying to convert is invalid, " +
"beginning from:\n " + iwe.getCulpritInContext() +
"\nThe culprit is probably the character '" +
iwe.getCulprit() + "'.");
return;
} catch (InvalidACIPException iae) {
JOptionPane.showMessageDialog(this,
"The ACIP you are trying to convert is invalid:\n" + iae.getMessage());
return; return;
} }
} }

View file

@ -503,15 +503,22 @@ public class Jskad extends JPanel implements DocumentListener {
} }
}); });
convertSelectionMenu.add(TMWACIPItem); convertSelectionMenu.add(TMWACIPItem);
toolsMenu.add(convertSelectionMenu);
JMenuItem wylieTMWItem = new JMenuItem("(Buggy) Convert Wylie to Tibetan Machine Web (non-Unicode)"); JMenuItem wylieTMWItem = new JMenuItem("Convert Wylie to Tibetan Machine Web (non-Unicode) (no warnings)");
wylieTMWItem.addActionListener(new ThdlActionListener() { wylieTMWItem.addActionListener(new ThdlActionListener() {
public void theRealActionPerformed(ActionEvent e) {
toTibetan(false, false);
}
});
convertSelectionMenu.add(wylieTMWItem);
JMenuItem wylieTMWWarningsItem = new JMenuItem("Convert Wylie to Tibetan Machine Web (non-Unicode) (pedantic warnings)");
wylieTMWWarningsItem.addActionListener(new ThdlActionListener() {
public void theRealActionPerformed(ActionEvent e) { public void theRealActionPerformed(ActionEvent e) {
toTibetan(false, true); toTibetan(false, true);
} }
}); });
convertSelectionMenu.add(wylieTMWItem); convertSelectionMenu.add(wylieTMWWarningsItem);
JMenuItem ACIPTMWItem = new JMenuItem("Convert ACIP to Tibetan Machine Web (non-Unicode) (no warnings)"); JMenuItem ACIPTMWItem = new JMenuItem("Convert ACIP to Tibetan Machine Web (non-Unicode) (no warnings)");
ACIPTMWItem.addActionListener(new ThdlActionListener() { ACIPTMWItem.addActionListener(new ThdlActionListener() {
@ -521,7 +528,7 @@ public class Jskad extends JPanel implements DocumentListener {
}); });
convertSelectionMenu.add(ACIPTMWItem); convertSelectionMenu.add(ACIPTMWItem);
JMenuItem ACIPTMWWarnItem = new JMenuItem("Convert ACIP to Tibetan Machine Web (non-Unicode) (with pedantic warnings)"); JMenuItem ACIPTMWWarnItem = new JMenuItem("Convert ACIP to Tibetan Machine Web (non-Unicode) (pedantic warnings)");
ACIPTMWWarnItem.addActionListener(new ThdlActionListener() { ACIPTMWWarnItem.addActionListener(new ThdlActionListener() {
public void theRealActionPerformed(ActionEvent e) { public void theRealActionPerformed(ActionEvent e) {
toTibetan(true, true); toTibetan(true, true);

View file

@ -19,17 +19,17 @@ Contributor(s): ______________________________________.
package org.thdl.tib.text; package org.thdl.tib.text;
/** /**
* An exception thrown whenever ACIP->TMW conversion in the Jskad GUI * An exception thrown whenever an EWTS->TMW or ACIP->TMWconversion in
* runs into invalid ACIP. * the Jskad GUI runs into an invalid transliteration string.
* @author David Chandler */ * @author David Chandler */
public class InvalidACIPException extends Exception { public class InvalidTransliterationException extends Exception {
private String error; private String error;
/** /**
* Creates an InvalidACIPException. * Creates an InvalidTransliterationException.
* @param s an error message * @param s an error message
*/ */
public InvalidACIPException(String s) { public InvalidTransliterationException(String s) {
error = s; error = s;
} }

View file

@ -22,7 +22,15 @@ package org.thdl.tib.text;
* @see TibetanMachineWeb */ * @see TibetanMachineWeb */
public interface THDLWylieConstants { public interface THDLWylieConstants {
/** /**
* the Wylie for bindu/anusvara * the Wylie for U+0F82
*/
public static final String U0F82 = "~M`";
/**
* the Wylie for U+0F83
*/
public static final String U0F83 = "~M";
/**
* the Wylie for bindu/anusvara (U+0F7E)
*/ */
public static final char BINDU = 'M'; public static final char BINDU = 'M';
/** /**
@ -52,6 +60,10 @@ public interface THDLWylieConstants {
*/ */
public static final String WYLIE_aVOWEL = "a"; public static final String WYLIE_aVOWEL = "a";
/** /**
* the Wylie for U+0F39
*/
public static final String WYLIE_TSA_PHRU = "^";
/**
* the Wylie for achung * the Wylie for achung
*/ */
public static final char ACHUNG_character = '\''; public static final char ACHUNG_character = '\'';

View file

@ -25,7 +25,9 @@ import javax.swing.text.rtf.RTFEditorKit;
import java.io.*; import java.io.*;
import org.thdl.util.ThdlDebug; import org.thdl.util.ThdlDebug;
import org.thdl.tib.text.ttt.TTraits;
import org.thdl.tib.text.ttt.ACIPTraits; import org.thdl.tib.text.ttt.ACIPTraits;
import org.thdl.tib.text.ttt.EWTSTraits;
import org.thdl.tib.text.ttt.TConverter; import org.thdl.tib.text.ttt.TConverter;
import org.thdl.tib.text.tshegbar.LegalTshegBar; import org.thdl.tib.text.tshegbar.LegalTshegBar;
import org.thdl.tib.text.tshegbar.UnicodeConstants; import org.thdl.tib.text.tshegbar.UnicodeConstants;
@ -312,34 +314,44 @@ public class TibTextUtils implements THDLWylieConstants {
= new boolean[] { false }; = new boolean[] { false };
/** /**
* Converts a string of ACIP into TibetanMachineWeb and inserts that * Converts a string of transliteration into TibetanMachineWeb and
* into tdoc at offset loc. * inserts that into tdoc at offset loc.
* @param acip the ACIP you want to convert * @param EWTSNotACIP true if you want THDL Extended Wylie, false if
* you want ACIP
* @param translit the transliteration you want to convert
* @param tdoc the document in which to insert the TMW * @param tdoc the document in which to insert the TMW
* @param loc the offset inside the document at which to insert the TMW * @param loc the offset inside the document at which to insert the TMW
* @param withWarnings true if and only if you want warnings to appear * @param withWarnings true if and only if you want warnings to appear
* in the output, such as "this could be a mistranscription of blah..." * in the output, such as "this could be a mistranscription of blah..."
* @throws InvalidACIPException if the ACIP is deemed invalid, i.e. if * @throws InvalidTransliterationException if the transliteration is
* it does not conform to the ACIP transcription rules (those in the * deemed invalid, i.e. if it does not conform to the transcription
* official document and the subtler rules pieced together by David * rules (those in the official document and the subtler rules pieced
* Chandler through study and private correspondence with Robert * together by David Chandler through study and private correspondence
* Chilton) * with Robert Chilton (for ACIP), Than Garson, David Germano, Chris
* Fynn, and others)
* @return the number of characters inserted into tdoc */ * @return the number of characters inserted into tdoc */
public static int insertTibetanMachineWebForACIP(String acip, public static int insertTibetanMachineWebForTranslit(boolean EWTSNotACIP,
TibetanDocument tdoc, String translit,
int loc, TibetanDocument tdoc,
boolean withWarnings) int loc,
throws InvalidACIPException boolean withWarnings)
throws InvalidTransliterationException
{ {
StringBuffer errors = new StringBuffer(); StringBuffer errors = new StringBuffer();
String warningLevel = withWarnings ? "All" : "None"; String warningLevel = withWarnings ? "All" : "None";
ArrayList al = ACIPTraits.instance().scanner().scan(acip, errors, 500,
false, warningLevel); TTraits traits = (EWTSNotACIP
? (TTraits)EWTSTraits.instance()
: (TTraits)ACIPTraits.instance());
ArrayList al = traits.scanner().scan(translit, errors, 500,
false, warningLevel);
if (null == al || errors.length() > 0) { if (null == al || errors.length() > 0) {
if (errors.length() > 0) if (errors.length() > 0)
throw new InvalidACIPException(errors.toString()); throw new InvalidTransliterationException(errors.toString());
else else
throw new InvalidACIPException("Fatal error converting ACIP to TMW."); throw new InvalidTransliterationException("Fatal error converting "
+ traits.shortTranslitName()
+ " to TMW.");
} }
boolean colors = withWarnings; boolean colors = withWarnings;
boolean putWarningsInOutput = false; boolean putWarningsInOutput = false;
@ -348,7 +360,7 @@ public class TibTextUtils implements THDLWylieConstants {
} }
try { try {
int tloc[] = new int[] { loc }; int tloc[] = new int[] { loc };
TConverter.convertToTMW(ACIPTraits.instance(), al, tdoc, null, null, TConverter.convertToTMW(traits, al, tdoc, null, null,
null, putWarningsInOutput, warningLevel, null, putWarningsInOutput, warningLevel,
false, colors, tloc); false, colors, tloc);
return tloc[0] - loc; return tloc[0] - loc;
@ -364,8 +376,13 @@ public class TibTextUtils implements THDLWylieConstants {
* corresponding to the Wylie text * corresponding to the Wylie text
* @throws InvalidWylieException if the Wylie is deemed invalid, * @throws InvalidWylieException if the Wylie is deemed invalid,
* i.e. if it does not conform to the Extended Wylie standard * i.e. if it does not conform to the Extended Wylie standard
* @deprecated by insertTibetanMachineWebForTranslit
*/ */
public static DuffData[] getTibetanMachineWebForEWTS(String wylie) throws InvalidWylieException { public static DuffData[] getTibetanMachineWebForEWTS(String wylie) throws InvalidWylieException {
ThdlDebug.noteIffyCode(); // deprecated method!
// TODO(dchandler): remove it and
// hopefully a ton of code that
// only it uses.
List chars = new ArrayList(); List chars = new ArrayList();
DuffCode dc; DuffCode dc;
int start = 0; int start = 0;

View file

@ -79,6 +79,11 @@ public class EWTSTest extends TestCase {
/** Causes a JUnit test case failure unless the EWTS document ewts /** Causes a JUnit test case failure unless the EWTS document ewts
* converts to the unicode expectedUnicode. */ * converts to the unicode expectedUnicode. */
static void ewts2uni_test(String ewts, String expectedUnicode) { static void ewts2uni_test(String ewts, String expectedUnicode) {
// TODO(DLC)[EWTS->Tibetan]: In addition to what this
// currently does, have this function convert to TMW and
// convert that TMW to Unicode and verify that the result is
// the same. Almost every call should allow for that.
StringBuffer errors = new StringBuffer(); StringBuffer errors = new StringBuffer();
String unicode = TConverter.convertToUnicodeText(EWTSTraits.instance(), String unicode = TConverter.convertToUnicodeText(EWTSTraits.instance(),
ewts, errors, ewts, errors,

View file

@ -164,6 +164,10 @@ public final class EWTSTraits implements TTraits {
// TODO(DLC)[EWTS->Tibetan]: I have no confidence in this! test, test, test. // TODO(DLC)[EWTS->Tibetan]: I have no confidence in this! test, test, test.
// TODO(DLC)[EWTS->Tibetan]: ko+o doesn't work. kai+-i doesn't work.
// TODO(DLC)[EWTS->Tibetan]: kai doesn't work.
// Order matters here. // Order matters here.
boolean context_added[] = new boolean[] { false }; boolean context_added[] = new boolean[] { false };
if (wowel.equals(THDLWylieConstants.WYLIE_aVOWEL)) { if (wowel.equals(THDLWylieConstants.WYLIE_aVOWEL)) {
@ -183,11 +187,7 @@ public final class EWTSTraits implements TTraits {
} }
if (wowel.indexOf(THDLWylieConstants.ai_VOWEL) >= 0) { if (wowel.indexOf(THDLWylieConstants.ai_VOWEL) >= 0) {
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.ai_VOWEL, context_added); TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.ai_VOWEL, context_added);
} } else if (wowel.indexOf(THDLWylieConstants.reverse_i_VOWEL) >= 0) {
if (wowel.indexOf(THDLWylieConstants.au_VOWEL) >= 0) {
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.au_VOWEL, context_added);
}
if (wowel.indexOf(THDLWylieConstants.reverse_i_VOWEL) >= 0) {
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.reverse_i_VOWEL, context_added); TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.reverse_i_VOWEL, context_added);
} else if (wowel.indexOf(THDLWylieConstants.i_VOWEL) >= 0) { } else if (wowel.indexOf(THDLWylieConstants.i_VOWEL) >= 0) {
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.i_VOWEL, context_added); TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.i_VOWEL, context_added);
@ -198,7 +198,9 @@ public final class EWTSTraits implements TTraits {
if (wowel.indexOf(THDLWylieConstants.o_VOWEL) >= 0) { if (wowel.indexOf(THDLWylieConstants.o_VOWEL) >= 0) {
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.o_VOWEL, context_added); TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.o_VOWEL, context_added);
} }
if (wowel.indexOf(THDLWylieConstants.u_VOWEL) >= 0) { if (wowel.indexOf(THDLWylieConstants.au_VOWEL) >= 0) {
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.au_VOWEL, context_added);
} else if (wowel.indexOf(THDLWylieConstants.u_VOWEL) >= 0) {
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.u_VOWEL, context_added); TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.u_VOWEL, context_added);
} }
if (wowel.indexOf("~X") >= 0) { // TODO(DLC)[EWTS->Tibetan]: introduce THDLWylieConstants.blah if (wowel.indexOf("~X") >= 0) { // TODO(DLC)[EWTS->Tibetan]: introduce THDLWylieConstants.blah
@ -209,7 +211,12 @@ public final class EWTSTraits implements TTraits {
} }
// FIXME: Use TMW9.61, the "o'i" special combination, when appropriate. // FIXME: Use TMW9.61, the "o'i" special combination, when appropriate.
if (wowel.indexOf('M') >= 0) { if (wowel.indexOf(THDLWylieConstants.BINDU) >= 0
// TODO(DLC)[EWTS->Tibetan]: This is really ugly... we
// rely on the fact that we know every Wylie wowel that
// contains 'M'. Let's, instead, parse the wowel.
&& wowel.indexOf(THDLWylieConstants.U0F82) < 0
&& wowel.indexOf(THDLWylieConstants.U0F83) < 0) {
DuffCode last = null; DuffCode last = null;
if (!context_added[0]) { if (!context_added[0]) {
last = preceding; last = preceding;
@ -219,10 +226,35 @@ public final class EWTSTraits implements TTraits {
// TODO(DLC)[EWTS->Tibetan]: is this okay???? when is a bindu okay to be alone??? // TODO(DLC)[EWTS->Tibetan]: is this okay???? when is a bindu okay to be alone???
} }
TibTextUtils.getBindu(duff, last); TibTextUtils.getBindu(duff, last);
context_added[0] = true;
}
if (!context_added[0]) {
duff.add(preceding);
} }
if (wowel.indexOf('H') >= 0) if (wowel.indexOf('H') >= 0)
duff.add(TibetanMachineWeb.getGlyph("H")); duff.add(TibetanMachineWeb.getGlyph("H"));
int ix;
if ((ix = wowel.indexOf(THDLWylieConstants.WYLIE_TSA_PHRU)) >= 0) {
// This likely won't look good! TMW has glyphs for [va]
// and [fa], so use that transliteration if you care, not
// [ph^] or [b^].
duff.add(TibetanMachineWeb.getGlyph(THDLWylieConstants.WYLIE_TSA_PHRU));
StringBuffer sb = new StringBuffer(wowel);
sb.replace(ix, ix + THDLWylieConstants.WYLIE_TSA_PHRU.length(), "");
wowel = sb.toString();
}
if ((ix = wowel.indexOf(THDLWylieConstants.U0F82)) >= 0) {
duff.add(TibetanMachineWeb.getGlyph(THDLWylieConstants.U0F82));
StringBuffer sb = new StringBuffer(wowel);
sb.replace(ix, ix + THDLWylieConstants.U0F82.length(), "");
wowel = sb.toString();
}
if ((ix = wowel.indexOf(THDLWylieConstants.U0F83)) >= 0) {
duff.add(TibetanMachineWeb.getGlyph(THDLWylieConstants.U0F83));
StringBuffer sb = new StringBuffer(wowel);
sb.replace(ix, ix + THDLWylieConstants.U0F83.length(), "");
wowel = sb.toString();
}
// TODO(DLC)[EWTS->Tibetan]: verify that no part of wowel is discarded! acip does that. 'jam~X I think we screw up, e.g. // TODO(DLC)[EWTS->Tibetan]: verify that no part of wowel is discarded! acip does that. 'jam~X I think we screw up, e.g.