EWTS->TMW fixes. Wowel handling still isn't perfect but I'm lazy.
Jskad now uses the new EWTS->TMW routine, not the old, and thus the "(Buggy)" label is [unfairly, perhaps] dropped.
This commit is contained in:
parent
0f99c402df
commit
982350371d
7 changed files with 129 additions and 86 deletions
|
@ -1628,33 +1628,17 @@ public void paste(int offset)
|
|||
*/
|
||||
public void toTibetanMachineWeb(String wylie, int offset) {
|
||||
try {
|
||||
StringTokenizer sTok = new StringTokenizer(wylie, "\n\t", true); // FIXME does this work on all platforms?
|
||||
while (sTok.hasMoreTokens()) {
|
||||
String next = sTok.nextToken();
|
||||
if (next.equals("\n") || next.equals("\t")) { // FIXME does this work on all platforms?
|
||||
try {
|
||||
getTibDoc().insertString(offset, next, null);
|
||||
offset++;
|
||||
} catch (BadLocationException ble) {
|
||||
ble.printStackTrace();
|
||||
ThdlDebug.noteIffyCode();
|
||||
}
|
||||
} else {
|
||||
DuffData[] dd = TibTextUtils.getTibetanMachineWebForEWTS(next);
|
||||
offset = getTibDoc().insertDuff(offset, dd);
|
||||
}
|
||||
}
|
||||
TibTextUtils.insertTibetanMachineWebForTranslit(
|
||||
true, wylie, getTibDoc(), offset,
|
||||
false // warnings?
|
||||
);
|
||||
} catch (InvalidTransliterationException ite) {
|
||||
JOptionPane.showMessageDialog(
|
||||
this,
|
||||
"The transliteration you are trying to convert is invalid:\n"
|
||||
+ ite.getMessage());
|
||||
return;
|
||||
}
|
||||
catch (InvalidWylieException iwe) {
|
||||
JOptionPane.showMessageDialog(this,
|
||||
"The Wylie you are trying to convert is invalid, " +
|
||||
"beginning from:\n " + iwe.getCulpritInContext() + "\n" +
|
||||
"The culprit is probably the character '"+iwe.getCulprit()+"'.");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
System.err.println("Could not convert: " + wylie);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1701,30 +1685,16 @@ public void paste(int offset)
|
|||
if ((0 != TibetanMachineWeb.getTMWFontNumber(fontName)) || i==endPos.getOffset()) {
|
||||
if (i != start) {
|
||||
try {
|
||||
DuffData[] duffdata = null;
|
||||
if (fromACIP) {
|
||||
getTibDoc().remove(start, i-start);
|
||||
i += -1 /* because i++ will occur */
|
||||
+ TibTextUtils.insertTibetanMachineWebForACIP(sb.toString(),
|
||||
getTibDoc(),
|
||||
start,
|
||||
withWarnings);
|
||||
} else
|
||||
duffdata = TibTextUtils.getTibetanMachineWebForEWTS(sb.toString());
|
||||
if (!fromACIP) {
|
||||
getTibDoc().remove(start, i-start);
|
||||
getTibDoc().insertDuff(start, duffdata);
|
||||
}
|
||||
} catch (InvalidWylieException iwe) {
|
||||
JOptionPane.showMessageDialog(this,
|
||||
"The Wylie you are trying to convert is invalid, " +
|
||||
"beginning from:\n " + iwe.getCulpritInContext() +
|
||||
"\nThe culprit is probably the character '" +
|
||||
iwe.getCulprit() + "'.");
|
||||
return;
|
||||
} catch (InvalidACIPException iae) {
|
||||
JOptionPane.showMessageDialog(this,
|
||||
"The ACIP you are trying to convert is invalid:\n" + iae.getMessage());
|
||||
getTibDoc().remove(start, i-start);
|
||||
i += -1 /* because i++ will occur */
|
||||
+ TibTextUtils.insertTibetanMachineWebForTranslit(
|
||||
!fromACIP, sb.toString(), getTibDoc(),
|
||||
start, withWarnings);
|
||||
} catch (InvalidTransliterationException ite) {
|
||||
JOptionPane.showMessageDialog(
|
||||
this,
|
||||
"The transliteration you are trying to convert is invalid:\n"
|
||||
+ ite.getMessage());
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -503,15 +503,22 @@ public class Jskad extends JPanel implements DocumentListener {
|
|||
}
|
||||
});
|
||||
convertSelectionMenu.add(TMWACIPItem);
|
||||
toolsMenu.add(convertSelectionMenu);
|
||||
|
||||
JMenuItem wylieTMWItem = new JMenuItem("(Buggy) Convert Wylie to Tibetan Machine Web (non-Unicode)");
|
||||
JMenuItem wylieTMWItem = new JMenuItem("Convert Wylie to Tibetan Machine Web (non-Unicode) (no warnings)");
|
||||
wylieTMWItem.addActionListener(new ThdlActionListener() {
|
||||
public void theRealActionPerformed(ActionEvent e) {
|
||||
toTibetan(false, false);
|
||||
}
|
||||
});
|
||||
convertSelectionMenu.add(wylieTMWItem);
|
||||
|
||||
JMenuItem wylieTMWWarningsItem = new JMenuItem("Convert Wylie to Tibetan Machine Web (non-Unicode) (pedantic warnings)");
|
||||
wylieTMWWarningsItem.addActionListener(new ThdlActionListener() {
|
||||
public void theRealActionPerformed(ActionEvent e) {
|
||||
toTibetan(false, true);
|
||||
}
|
||||
});
|
||||
convertSelectionMenu.add(wylieTMWItem);
|
||||
convertSelectionMenu.add(wylieTMWWarningsItem);
|
||||
|
||||
JMenuItem ACIPTMWItem = new JMenuItem("Convert ACIP to Tibetan Machine Web (non-Unicode) (no warnings)");
|
||||
ACIPTMWItem.addActionListener(new ThdlActionListener() {
|
||||
|
@ -521,7 +528,7 @@ public class Jskad extends JPanel implements DocumentListener {
|
|||
});
|
||||
convertSelectionMenu.add(ACIPTMWItem);
|
||||
|
||||
JMenuItem ACIPTMWWarnItem = new JMenuItem("Convert ACIP to Tibetan Machine Web (non-Unicode) (with pedantic warnings)");
|
||||
JMenuItem ACIPTMWWarnItem = new JMenuItem("Convert ACIP to Tibetan Machine Web (non-Unicode) (pedantic warnings)");
|
||||
ACIPTMWWarnItem.addActionListener(new ThdlActionListener() {
|
||||
public void theRealActionPerformed(ActionEvent e) {
|
||||
toTibetan(true, true);
|
||||
|
|
|
@ -19,17 +19,17 @@ Contributor(s): ______________________________________.
|
|||
package org.thdl.tib.text;
|
||||
|
||||
/**
|
||||
* An exception thrown whenever ACIP->TMW conversion in the Jskad GUI
|
||||
* runs into invalid ACIP.
|
||||
* An exception thrown whenever an EWTS->TMW or ACIP->TMWconversion in
|
||||
* the Jskad GUI runs into an invalid transliteration string.
|
||||
* @author David Chandler */
|
||||
public class InvalidACIPException extends Exception {
|
||||
public class InvalidTransliterationException extends Exception {
|
||||
private String error;
|
||||
|
||||
/**
|
||||
* Creates an InvalidACIPException.
|
||||
* Creates an InvalidTransliterationException.
|
||||
* @param s an error message
|
||||
*/
|
||||
public InvalidACIPException(String s) {
|
||||
public InvalidTransliterationException(String s) {
|
||||
error = s;
|
||||
}
|
||||
|
|
@ -22,7 +22,15 @@ package org.thdl.tib.text;
|
|||
* @see TibetanMachineWeb */
|
||||
public interface THDLWylieConstants {
|
||||
/**
|
||||
* the Wylie for bindu/anusvara
|
||||
* the Wylie for U+0F82
|
||||
*/
|
||||
public static final String U0F82 = "~M`";
|
||||
/**
|
||||
* the Wylie for U+0F83
|
||||
*/
|
||||
public static final String U0F83 = "~M";
|
||||
/**
|
||||
* the Wylie for bindu/anusvara (U+0F7E)
|
||||
*/
|
||||
public static final char BINDU = 'M';
|
||||
/**
|
||||
|
@ -52,6 +60,10 @@ public interface THDLWylieConstants {
|
|||
*/
|
||||
public static final String WYLIE_aVOWEL = "a";
|
||||
/**
|
||||
* the Wylie for U+0F39
|
||||
*/
|
||||
public static final String WYLIE_TSA_PHRU = "^";
|
||||
/**
|
||||
* the Wylie for achung
|
||||
*/
|
||||
public static final char ACHUNG_character = '\'';
|
||||
|
|
|
@ -25,7 +25,9 @@ import javax.swing.text.rtf.RTFEditorKit;
|
|||
import java.io.*;
|
||||
|
||||
import org.thdl.util.ThdlDebug;
|
||||
import org.thdl.tib.text.ttt.TTraits;
|
||||
import org.thdl.tib.text.ttt.ACIPTraits;
|
||||
import org.thdl.tib.text.ttt.EWTSTraits;
|
||||
import org.thdl.tib.text.ttt.TConverter;
|
||||
import org.thdl.tib.text.tshegbar.LegalTshegBar;
|
||||
import org.thdl.tib.text.tshegbar.UnicodeConstants;
|
||||
|
@ -312,34 +314,44 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
= new boolean[] { false };
|
||||
|
||||
/**
|
||||
* Converts a string of ACIP into TibetanMachineWeb and inserts that
|
||||
* into tdoc at offset loc.
|
||||
* @param acip the ACIP you want to convert
|
||||
* Converts a string of transliteration into TibetanMachineWeb and
|
||||
* inserts that into tdoc at offset loc.
|
||||
* @param EWTSNotACIP true if you want THDL Extended Wylie, false if
|
||||
* you want ACIP
|
||||
* @param translit the transliteration you want to convert
|
||||
* @param tdoc the document in which to insert the TMW
|
||||
* @param loc the offset inside the document at which to insert the TMW
|
||||
* @param withWarnings true if and only if you want warnings to appear
|
||||
* in the output, such as "this could be a mistranscription of blah..."
|
||||
* @throws InvalidACIPException if the ACIP is deemed invalid, i.e. if
|
||||
* it does not conform to the ACIP transcription rules (those in the
|
||||
* official document and the subtler rules pieced together by David
|
||||
* Chandler through study and private correspondence with Robert
|
||||
* Chilton)
|
||||
* @throws InvalidTransliterationException if the transliteration is
|
||||
* deemed invalid, i.e. if it does not conform to the transcription
|
||||
* rules (those in the official document and the subtler rules pieced
|
||||
* together by David Chandler through study and private correspondence
|
||||
* with Robert Chilton (for ACIP), Than Garson, David Germano, Chris
|
||||
* Fynn, and others)
|
||||
* @return the number of characters inserted into tdoc */
|
||||
public static int insertTibetanMachineWebForACIP(String acip,
|
||||
TibetanDocument tdoc,
|
||||
int loc,
|
||||
boolean withWarnings)
|
||||
throws InvalidACIPException
|
||||
public static int insertTibetanMachineWebForTranslit(boolean EWTSNotACIP,
|
||||
String translit,
|
||||
TibetanDocument tdoc,
|
||||
int loc,
|
||||
boolean withWarnings)
|
||||
throws InvalidTransliterationException
|
||||
{
|
||||
StringBuffer errors = new StringBuffer();
|
||||
String warningLevel = withWarnings ? "All" : "None";
|
||||
ArrayList al = ACIPTraits.instance().scanner().scan(acip, errors, 500,
|
||||
false, warningLevel);
|
||||
|
||||
TTraits traits = (EWTSNotACIP
|
||||
? (TTraits)EWTSTraits.instance()
|
||||
: (TTraits)ACIPTraits.instance());
|
||||
ArrayList al = traits.scanner().scan(translit, errors, 500,
|
||||
false, warningLevel);
|
||||
if (null == al || errors.length() > 0) {
|
||||
if (errors.length() > 0)
|
||||
throw new InvalidACIPException(errors.toString());
|
||||
throw new InvalidTransliterationException(errors.toString());
|
||||
else
|
||||
throw new InvalidACIPException("Fatal error converting ACIP to TMW.");
|
||||
throw new InvalidTransliterationException("Fatal error converting "
|
||||
+ traits.shortTranslitName()
|
||||
+ " to TMW.");
|
||||
}
|
||||
boolean colors = withWarnings;
|
||||
boolean putWarningsInOutput = false;
|
||||
|
@ -348,7 +360,7 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
}
|
||||
try {
|
||||
int tloc[] = new int[] { loc };
|
||||
TConverter.convertToTMW(ACIPTraits.instance(), al, tdoc, null, null,
|
||||
TConverter.convertToTMW(traits, al, tdoc, null, null,
|
||||
null, putWarningsInOutput, warningLevel,
|
||||
false, colors, tloc);
|
||||
return tloc[0] - loc;
|
||||
|
@ -364,8 +376,13 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
* corresponding to the Wylie text
|
||||
* @throws InvalidWylieException if the Wylie is deemed invalid,
|
||||
* i.e. if it does not conform to the Extended Wylie standard
|
||||
* @deprecated by insertTibetanMachineWebForTranslit
|
||||
*/
|
||||
public static DuffData[] getTibetanMachineWebForEWTS(String wylie) throws InvalidWylieException {
|
||||
ThdlDebug.noteIffyCode(); // deprecated method!
|
||||
// TODO(dchandler): remove it and
|
||||
// hopefully a ton of code that
|
||||
// only it uses.
|
||||
List chars = new ArrayList();
|
||||
DuffCode dc;
|
||||
int start = 0;
|
||||
|
|
|
@ -79,6 +79,11 @@ public class EWTSTest extends TestCase {
|
|||
/** Causes a JUnit test case failure unless the EWTS document ewts
|
||||
* converts to the unicode expectedUnicode. */
|
||||
static void ewts2uni_test(String ewts, String expectedUnicode) {
|
||||
// TODO(DLC)[EWTS->Tibetan]: In addition to what this
|
||||
// currently does, have this function convert to TMW and
|
||||
// convert that TMW to Unicode and verify that the result is
|
||||
// the same. Almost every call should allow for that.
|
||||
|
||||
StringBuffer errors = new StringBuffer();
|
||||
String unicode = TConverter.convertToUnicodeText(EWTSTraits.instance(),
|
||||
ewts, errors,
|
||||
|
|
|
@ -164,6 +164,10 @@ public final class EWTSTraits implements TTraits {
|
|||
|
||||
// TODO(DLC)[EWTS->Tibetan]: I have no confidence in this! test, test, test.
|
||||
|
||||
// TODO(DLC)[EWTS->Tibetan]: ko+o doesn't work. kai+-i doesn't work.
|
||||
|
||||
// TODO(DLC)[EWTS->Tibetan]: kai doesn't work.
|
||||
|
||||
// Order matters here.
|
||||
boolean context_added[] = new boolean[] { false };
|
||||
if (wowel.equals(THDLWylieConstants.WYLIE_aVOWEL)) {
|
||||
|
@ -183,11 +187,7 @@ public final class EWTSTraits implements TTraits {
|
|||
}
|
||||
if (wowel.indexOf(THDLWylieConstants.ai_VOWEL) >= 0) {
|
||||
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.ai_VOWEL, context_added);
|
||||
}
|
||||
if (wowel.indexOf(THDLWylieConstants.au_VOWEL) >= 0) {
|
||||
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.au_VOWEL, context_added);
|
||||
}
|
||||
if (wowel.indexOf(THDLWylieConstants.reverse_i_VOWEL) >= 0) {
|
||||
} else if (wowel.indexOf(THDLWylieConstants.reverse_i_VOWEL) >= 0) {
|
||||
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.reverse_i_VOWEL, context_added);
|
||||
} else if (wowel.indexOf(THDLWylieConstants.i_VOWEL) >= 0) {
|
||||
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.i_VOWEL, context_added);
|
||||
|
@ -198,7 +198,9 @@ public final class EWTSTraits implements TTraits {
|
|||
if (wowel.indexOf(THDLWylieConstants.o_VOWEL) >= 0) {
|
||||
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.o_VOWEL, context_added);
|
||||
}
|
||||
if (wowel.indexOf(THDLWylieConstants.u_VOWEL) >= 0) {
|
||||
if (wowel.indexOf(THDLWylieConstants.au_VOWEL) >= 0) {
|
||||
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.au_VOWEL, context_added);
|
||||
} else if (wowel.indexOf(THDLWylieConstants.u_VOWEL) >= 0) {
|
||||
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.u_VOWEL, context_added);
|
||||
}
|
||||
if (wowel.indexOf("~X") >= 0) { // TODO(DLC)[EWTS->Tibetan]: introduce THDLWylieConstants.blah
|
||||
|
@ -209,7 +211,12 @@ public final class EWTSTraits implements TTraits {
|
|||
}
|
||||
// FIXME: Use TMW9.61, the "o'i" special combination, when appropriate.
|
||||
|
||||
if (wowel.indexOf('M') >= 0) {
|
||||
if (wowel.indexOf(THDLWylieConstants.BINDU) >= 0
|
||||
// TODO(DLC)[EWTS->Tibetan]: This is really ugly... we
|
||||
// rely on the fact that we know every Wylie wowel that
|
||||
// contains 'M'. Let's, instead, parse the wowel.
|
||||
&& wowel.indexOf(THDLWylieConstants.U0F82) < 0
|
||||
&& wowel.indexOf(THDLWylieConstants.U0F83) < 0) {
|
||||
DuffCode last = null;
|
||||
if (!context_added[0]) {
|
||||
last = preceding;
|
||||
|
@ -219,10 +226,35 @@ public final class EWTSTraits implements TTraits {
|
|||
// TODO(DLC)[EWTS->Tibetan]: is this okay???? when is a bindu okay to be alone???
|
||||
}
|
||||
TibTextUtils.getBindu(duff, last);
|
||||
context_added[0] = true;
|
||||
}
|
||||
if (!context_added[0]) {
|
||||
duff.add(preceding);
|
||||
}
|
||||
if (wowel.indexOf('H') >= 0)
|
||||
duff.add(TibetanMachineWeb.getGlyph("H"));
|
||||
|
||||
int ix;
|
||||
if ((ix = wowel.indexOf(THDLWylieConstants.WYLIE_TSA_PHRU)) >= 0) {
|
||||
// This likely won't look good! TMW has glyphs for [va]
|
||||
// and [fa], so use that transliteration if you care, not
|
||||
// [ph^] or [b^].
|
||||
duff.add(TibetanMachineWeb.getGlyph(THDLWylieConstants.WYLIE_TSA_PHRU));
|
||||
StringBuffer sb = new StringBuffer(wowel);
|
||||
sb.replace(ix, ix + THDLWylieConstants.WYLIE_TSA_PHRU.length(), "");
|
||||
wowel = sb.toString();
|
||||
}
|
||||
if ((ix = wowel.indexOf(THDLWylieConstants.U0F82)) >= 0) {
|
||||
duff.add(TibetanMachineWeb.getGlyph(THDLWylieConstants.U0F82));
|
||||
StringBuffer sb = new StringBuffer(wowel);
|
||||
sb.replace(ix, ix + THDLWylieConstants.U0F82.length(), "");
|
||||
wowel = sb.toString();
|
||||
}
|
||||
if ((ix = wowel.indexOf(THDLWylieConstants.U0F83)) >= 0) {
|
||||
duff.add(TibetanMachineWeb.getGlyph(THDLWylieConstants.U0F83));
|
||||
StringBuffer sb = new StringBuffer(wowel);
|
||||
sb.replace(ix, ix + THDLWylieConstants.U0F83.length(), "");
|
||||
wowel = sb.toString();
|
||||
}
|
||||
|
||||
// TODO(DLC)[EWTS->Tibetan]: verify that no part of wowel is discarded! acip does that. 'jam~X I think we screw up, e.g.
|
||||
|
||||
|
|
Loading…
Reference in a new issue