EWTS->TMW fixes. Wowel handling still isn't perfect but I'm lazy.

Jskad now uses the new EWTS->TMW routine, not the old, and thus the
"(Buggy)" label is [unfairly, perhaps] dropped.
This commit is contained in:
dchandler 2005-07-07 01:30:03 +00:00
parent 0f99c402df
commit 982350371d
7 changed files with 129 additions and 86 deletions

View File

@ -1628,33 +1628,17 @@ public void paste(int offset)
*/
public void toTibetanMachineWeb(String wylie, int offset) {
try {
StringTokenizer sTok = new StringTokenizer(wylie, "\n\t", true); // FIXME does this work on all platforms?
while (sTok.hasMoreTokens()) {
String next = sTok.nextToken();
if (next.equals("\n") || next.equals("\t")) { // FIXME does this work on all platforms?
try {
getTibDoc().insertString(offset, next, null);
offset++;
} catch (BadLocationException ble) {
ble.printStackTrace();
ThdlDebug.noteIffyCode();
}
} else {
DuffData[] dd = TibTextUtils.getTibetanMachineWebForEWTS(next);
offset = getTibDoc().insertDuff(offset, dd);
}
}
TibTextUtils.insertTibetanMachineWebForTranslit(
true, wylie, getTibDoc(), offset,
false // warnings?
);
} catch (InvalidTransliterationException ite) {
JOptionPane.showMessageDialog(
this,
"The transliteration you are trying to convert is invalid:\n"
+ ite.getMessage());
return;
}
catch (InvalidWylieException iwe) {
JOptionPane.showMessageDialog(this,
"The Wylie you are trying to convert is invalid, " +
"beginning from:\n " + iwe.getCulpritInContext() + "\n" +
"The culprit is probably the character '"+iwe.getCulprit()+"'.");
}
catch (Exception e)
{
System.err.println("Could not convert: " + wylie);
}
}
/**
@ -1701,30 +1685,16 @@ public void paste(int offset)
if ((0 != TibetanMachineWeb.getTMWFontNumber(fontName)) || i==endPos.getOffset()) {
if (i != start) {
try {
DuffData[] duffdata = null;
if (fromACIP) {
getTibDoc().remove(start, i-start);
i += -1 /* because i++ will occur */
+ TibTextUtils.insertTibetanMachineWebForACIP(sb.toString(),
getTibDoc(),
start,
withWarnings);
} else
duffdata = TibTextUtils.getTibetanMachineWebForEWTS(sb.toString());
if (!fromACIP) {
getTibDoc().remove(start, i-start);
getTibDoc().insertDuff(start, duffdata);
}
} catch (InvalidWylieException iwe) {
JOptionPane.showMessageDialog(this,
"The Wylie you are trying to convert is invalid, " +
"beginning from:\n " + iwe.getCulpritInContext() +
"\nThe culprit is probably the character '" +
iwe.getCulprit() + "'.");
return;
} catch (InvalidACIPException iae) {
JOptionPane.showMessageDialog(this,
"The ACIP you are trying to convert is invalid:\n" + iae.getMessage());
getTibDoc().remove(start, i-start);
i += -1 /* because i++ will occur */
+ TibTextUtils.insertTibetanMachineWebForTranslit(
!fromACIP, sb.toString(), getTibDoc(),
start, withWarnings);
} catch (InvalidTransliterationException ite) {
JOptionPane.showMessageDialog(
this,
"The transliteration you are trying to convert is invalid:\n"
+ ite.getMessage());
return;
}
}

View File

@ -503,15 +503,22 @@ public class Jskad extends JPanel implements DocumentListener {
}
});
convertSelectionMenu.add(TMWACIPItem);
toolsMenu.add(convertSelectionMenu);
JMenuItem wylieTMWItem = new JMenuItem("(Buggy) Convert Wylie to Tibetan Machine Web (non-Unicode)");
JMenuItem wylieTMWItem = new JMenuItem("Convert Wylie to Tibetan Machine Web (non-Unicode) (no warnings)");
wylieTMWItem.addActionListener(new ThdlActionListener() {
public void theRealActionPerformed(ActionEvent e) {
toTibetan(false, false);
}
});
convertSelectionMenu.add(wylieTMWItem);
JMenuItem wylieTMWWarningsItem = new JMenuItem("Convert Wylie to Tibetan Machine Web (non-Unicode) (pedantic warnings)");
wylieTMWWarningsItem.addActionListener(new ThdlActionListener() {
public void theRealActionPerformed(ActionEvent e) {
toTibetan(false, true);
}
});
convertSelectionMenu.add(wylieTMWItem);
convertSelectionMenu.add(wylieTMWWarningsItem);
JMenuItem ACIPTMWItem = new JMenuItem("Convert ACIP to Tibetan Machine Web (non-Unicode) (no warnings)");
ACIPTMWItem.addActionListener(new ThdlActionListener() {
@ -521,7 +528,7 @@ public class Jskad extends JPanel implements DocumentListener {
});
convertSelectionMenu.add(ACIPTMWItem);
JMenuItem ACIPTMWWarnItem = new JMenuItem("Convert ACIP to Tibetan Machine Web (non-Unicode) (with pedantic warnings)");
JMenuItem ACIPTMWWarnItem = new JMenuItem("Convert ACIP to Tibetan Machine Web (non-Unicode) (pedantic warnings)");
ACIPTMWWarnItem.addActionListener(new ThdlActionListener() {
public void theRealActionPerformed(ActionEvent e) {
toTibetan(true, true);

View File

@ -19,17 +19,17 @@ Contributor(s): ______________________________________.
package org.thdl.tib.text;
/**
* An exception thrown whenever ACIP->TMW conversion in the Jskad GUI
* runs into invalid ACIP.
* An exception thrown whenever an EWTS->TMW or ACIP->TMWconversion in
* the Jskad GUI runs into an invalid transliteration string.
* @author David Chandler */
public class InvalidACIPException extends Exception {
public class InvalidTransliterationException extends Exception {
private String error;
/**
* Creates an InvalidACIPException.
* Creates an InvalidTransliterationException.
* @param s an error message
*/
public InvalidACIPException(String s) {
public InvalidTransliterationException(String s) {
error = s;
}

View File

@ -22,7 +22,15 @@ package org.thdl.tib.text;
* @see TibetanMachineWeb */
public interface THDLWylieConstants {
/**
* the Wylie for bindu/anusvara
* the Wylie for U+0F82
*/
public static final String U0F82 = "~M`";
/**
* the Wylie for U+0F83
*/
public static final String U0F83 = "~M";
/**
* the Wylie for bindu/anusvara (U+0F7E)
*/
public static final char BINDU = 'M';
/**
@ -52,6 +60,10 @@ public interface THDLWylieConstants {
*/
public static final String WYLIE_aVOWEL = "a";
/**
* the Wylie for U+0F39
*/
public static final String WYLIE_TSA_PHRU = "^";
/**
* the Wylie for achung
*/
public static final char ACHUNG_character = '\'';

View File

@ -25,7 +25,9 @@ import javax.swing.text.rtf.RTFEditorKit;
import java.io.*;
import org.thdl.util.ThdlDebug;
import org.thdl.tib.text.ttt.TTraits;
import org.thdl.tib.text.ttt.ACIPTraits;
import org.thdl.tib.text.ttt.EWTSTraits;
import org.thdl.tib.text.ttt.TConverter;
import org.thdl.tib.text.tshegbar.LegalTshegBar;
import org.thdl.tib.text.tshegbar.UnicodeConstants;
@ -312,34 +314,44 @@ public class TibTextUtils implements THDLWylieConstants {
= new boolean[] { false };
/**
* Converts a string of ACIP into TibetanMachineWeb and inserts that
* into tdoc at offset loc.
* @param acip the ACIP you want to convert
* Converts a string of transliteration into TibetanMachineWeb and
* inserts that into tdoc at offset loc.
* @param EWTSNotACIP true if you want THDL Extended Wylie, false if
* you want ACIP
* @param translit the transliteration you want to convert
* @param tdoc the document in which to insert the TMW
* @param loc the offset inside the document at which to insert the TMW
* @param withWarnings true if and only if you want warnings to appear
* in the output, such as "this could be a mistranscription of blah..."
* @throws InvalidACIPException if the ACIP is deemed invalid, i.e. if
* it does not conform to the ACIP transcription rules (those in the
* official document and the subtler rules pieced together by David
* Chandler through study and private correspondence with Robert
* Chilton)
* @throws InvalidTransliterationException if the transliteration is
* deemed invalid, i.e. if it does not conform to the transcription
* rules (those in the official document and the subtler rules pieced
* together by David Chandler through study and private correspondence
* with Robert Chilton (for ACIP), Than Garson, David Germano, Chris
* Fynn, and others)
* @return the number of characters inserted into tdoc */
public static int insertTibetanMachineWebForACIP(String acip,
TibetanDocument tdoc,
int loc,
boolean withWarnings)
throws InvalidACIPException
public static int insertTibetanMachineWebForTranslit(boolean EWTSNotACIP,
String translit,
TibetanDocument tdoc,
int loc,
boolean withWarnings)
throws InvalidTransliterationException
{
StringBuffer errors = new StringBuffer();
String warningLevel = withWarnings ? "All" : "None";
ArrayList al = ACIPTraits.instance().scanner().scan(acip, errors, 500,
false, warningLevel);
TTraits traits = (EWTSNotACIP
? (TTraits)EWTSTraits.instance()
: (TTraits)ACIPTraits.instance());
ArrayList al = traits.scanner().scan(translit, errors, 500,
false, warningLevel);
if (null == al || errors.length() > 0) {
if (errors.length() > 0)
throw new InvalidACIPException(errors.toString());
throw new InvalidTransliterationException(errors.toString());
else
throw new InvalidACIPException("Fatal error converting ACIP to TMW.");
throw new InvalidTransliterationException("Fatal error converting "
+ traits.shortTranslitName()
+ " to TMW.");
}
boolean colors = withWarnings;
boolean putWarningsInOutput = false;
@ -348,7 +360,7 @@ public class TibTextUtils implements THDLWylieConstants {
}
try {
int tloc[] = new int[] { loc };
TConverter.convertToTMW(ACIPTraits.instance(), al, tdoc, null, null,
TConverter.convertToTMW(traits, al, tdoc, null, null,
null, putWarningsInOutput, warningLevel,
false, colors, tloc);
return tloc[0] - loc;
@ -364,8 +376,13 @@ public class TibTextUtils implements THDLWylieConstants {
* corresponding to the Wylie text
* @throws InvalidWylieException if the Wylie is deemed invalid,
* i.e. if it does not conform to the Extended Wylie standard
* @deprecated by insertTibetanMachineWebForTranslit
*/
public static DuffData[] getTibetanMachineWebForEWTS(String wylie) throws InvalidWylieException {
ThdlDebug.noteIffyCode(); // deprecated method!
// TODO(dchandler): remove it and
// hopefully a ton of code that
// only it uses.
List chars = new ArrayList();
DuffCode dc;
int start = 0;

View File

@ -79,6 +79,11 @@ public class EWTSTest extends TestCase {
/** Causes a JUnit test case failure unless the EWTS document ewts
* converts to the unicode expectedUnicode. */
static void ewts2uni_test(String ewts, String expectedUnicode) {
// TODO(DLC)[EWTS->Tibetan]: In addition to what this
// currently does, have this function convert to TMW and
// convert that TMW to Unicode and verify that the result is
// the same. Almost every call should allow for that.
StringBuffer errors = new StringBuffer();
String unicode = TConverter.convertToUnicodeText(EWTSTraits.instance(),
ewts, errors,

View File

@ -164,6 +164,10 @@ public final class EWTSTraits implements TTraits {
// TODO(DLC)[EWTS->Tibetan]: I have no confidence in this! test, test, test.
// TODO(DLC)[EWTS->Tibetan]: ko+o doesn't work. kai+-i doesn't work.
// TODO(DLC)[EWTS->Tibetan]: kai doesn't work.
// Order matters here.
boolean context_added[] = new boolean[] { false };
if (wowel.equals(THDLWylieConstants.WYLIE_aVOWEL)) {
@ -183,11 +187,7 @@ public final class EWTSTraits implements TTraits {
}
if (wowel.indexOf(THDLWylieConstants.ai_VOWEL) >= 0) {
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.ai_VOWEL, context_added);
}
if (wowel.indexOf(THDLWylieConstants.au_VOWEL) >= 0) {
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.au_VOWEL, context_added);
}
if (wowel.indexOf(THDLWylieConstants.reverse_i_VOWEL) >= 0) {
} else if (wowel.indexOf(THDLWylieConstants.reverse_i_VOWEL) >= 0) {
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.reverse_i_VOWEL, context_added);
} else if (wowel.indexOf(THDLWylieConstants.i_VOWEL) >= 0) {
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.i_VOWEL, context_added);
@ -198,7 +198,9 @@ public final class EWTSTraits implements TTraits {
if (wowel.indexOf(THDLWylieConstants.o_VOWEL) >= 0) {
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.o_VOWEL, context_added);
}
if (wowel.indexOf(THDLWylieConstants.u_VOWEL) >= 0) {
if (wowel.indexOf(THDLWylieConstants.au_VOWEL) >= 0) {
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.au_VOWEL, context_added);
} else if (wowel.indexOf(THDLWylieConstants.u_VOWEL) >= 0) {
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.u_VOWEL, context_added);
}
if (wowel.indexOf("~X") >= 0) { // TODO(DLC)[EWTS->Tibetan]: introduce THDLWylieConstants.blah
@ -209,7 +211,12 @@ public final class EWTSTraits implements TTraits {
}
// FIXME: Use TMW9.61, the "o'i" special combination, when appropriate.
if (wowel.indexOf('M') >= 0) {
if (wowel.indexOf(THDLWylieConstants.BINDU) >= 0
// TODO(DLC)[EWTS->Tibetan]: This is really ugly... we
// rely on the fact that we know every Wylie wowel that
// contains 'M'. Let's, instead, parse the wowel.
&& wowel.indexOf(THDLWylieConstants.U0F82) < 0
&& wowel.indexOf(THDLWylieConstants.U0F83) < 0) {
DuffCode last = null;
if (!context_added[0]) {
last = preceding;
@ -219,10 +226,35 @@ public final class EWTSTraits implements TTraits {
// TODO(DLC)[EWTS->Tibetan]: is this okay???? when is a bindu okay to be alone???
}
TibTextUtils.getBindu(duff, last);
context_added[0] = true;
}
if (!context_added[0]) {
duff.add(preceding);
}
if (wowel.indexOf('H') >= 0)
duff.add(TibetanMachineWeb.getGlyph("H"));
int ix;
if ((ix = wowel.indexOf(THDLWylieConstants.WYLIE_TSA_PHRU)) >= 0) {
// This likely won't look good! TMW has glyphs for [va]
// and [fa], so use that transliteration if you care, not
// [ph^] or [b^].
duff.add(TibetanMachineWeb.getGlyph(THDLWylieConstants.WYLIE_TSA_PHRU));
StringBuffer sb = new StringBuffer(wowel);
sb.replace(ix, ix + THDLWylieConstants.WYLIE_TSA_PHRU.length(), "");
wowel = sb.toString();
}
if ((ix = wowel.indexOf(THDLWylieConstants.U0F82)) >= 0) {
duff.add(TibetanMachineWeb.getGlyph(THDLWylieConstants.U0F82));
StringBuffer sb = new StringBuffer(wowel);
sb.replace(ix, ix + THDLWylieConstants.U0F82.length(), "");
wowel = sb.toString();
}
if ((ix = wowel.indexOf(THDLWylieConstants.U0F83)) >= 0) {
duff.add(TibetanMachineWeb.getGlyph(THDLWylieConstants.U0F83));
StringBuffer sb = new StringBuffer(wowel);
sb.replace(ix, ix + THDLWylieConstants.U0F83.length(), "");
wowel = sb.toString();
}
// TODO(DLC)[EWTS->Tibetan]: verify that no part of wowel is discarded! acip does that. 'jam~X I think we screw up, e.g.