From 982350371d2a0478eab444e81e43fce8d35cad23 Mon Sep 17 00:00:00 2001 From: dchandler Date: Thu, 7 Jul 2005 01:30:03 +0000 Subject: [PATCH] EWTS->TMW fixes. Wowel handling still isn't perfect but I'm lazy. Jskad now uses the new EWTS->TMW routine, not the old, and thus the "(Buggy)" label is [unfairly, perhaps] dropped. --- source/org/thdl/tib/input/DuffPane.java | 70 ++++++------------- source/org/thdl/tib/input/Jskad.java | 15 ++-- ...a => InvalidTransliterationException.java} | 10 +-- .../org/thdl/tib/text/THDLWylieConstants.java | 14 +++- source/org/thdl/tib/text/TibTextUtils.java | 53 +++++++++----- source/org/thdl/tib/text/ttt/EWTSTest.java | 5 ++ source/org/thdl/tib/text/ttt/EWTSTraits.java | 48 ++++++++++--- 7 files changed, 129 insertions(+), 86 deletions(-) rename source/org/thdl/tib/text/{InvalidACIPException.java => InvalidTransliterationException.java} (76%) diff --git a/source/org/thdl/tib/input/DuffPane.java b/source/org/thdl/tib/input/DuffPane.java index 2387127..ce0e9c3 100644 --- a/source/org/thdl/tib/input/DuffPane.java +++ b/source/org/thdl/tib/input/DuffPane.java @@ -1628,33 +1628,17 @@ public void paste(int offset) */ public void toTibetanMachineWeb(String wylie, int offset) { try { - StringTokenizer sTok = new StringTokenizer(wylie, "\n\t", true); // FIXME does this work on all platforms? - while (sTok.hasMoreTokens()) { - String next = sTok.nextToken(); - if (next.equals("\n") || next.equals("\t")) { // FIXME does this work on all platforms? - try { - getTibDoc().insertString(offset, next, null); - offset++; - } catch (BadLocationException ble) { - ble.printStackTrace(); - ThdlDebug.noteIffyCode(); - } - } else { - DuffData[] dd = TibTextUtils.getTibetanMachineWebForEWTS(next); - offset = getTibDoc().insertDuff(offset, dd); - } - } + TibTextUtils.insertTibetanMachineWebForTranslit( + true, wylie, getTibDoc(), offset, + false // warnings? + ); + } catch (InvalidTransliterationException ite) { + JOptionPane.showMessageDialog( + this, + "The transliteration you are trying to convert is invalid:\n" + + ite.getMessage()); + return; } - catch (InvalidWylieException iwe) { - JOptionPane.showMessageDialog(this, - "The Wylie you are trying to convert is invalid, " + - "beginning from:\n " + iwe.getCulpritInContext() + "\n" + - "The culprit is probably the character '"+iwe.getCulprit()+"'."); - } - catch (Exception e) - { - System.err.println("Could not convert: " + wylie); - } } /** @@ -1701,30 +1685,16 @@ public void paste(int offset) if ((0 != TibetanMachineWeb.getTMWFontNumber(fontName)) || i==endPos.getOffset()) { if (i != start) { try { - DuffData[] duffdata = null; - if (fromACIP) { - getTibDoc().remove(start, i-start); - i += -1 /* because i++ will occur */ - + TibTextUtils.insertTibetanMachineWebForACIP(sb.toString(), - getTibDoc(), - start, - withWarnings); - } else - duffdata = TibTextUtils.getTibetanMachineWebForEWTS(sb.toString()); - if (!fromACIP) { - getTibDoc().remove(start, i-start); - getTibDoc().insertDuff(start, duffdata); - } - } catch (InvalidWylieException iwe) { - JOptionPane.showMessageDialog(this, - "The Wylie you are trying to convert is invalid, " + - "beginning from:\n " + iwe.getCulpritInContext() + - "\nThe culprit is probably the character '" + - iwe.getCulprit() + "'."); - return; - } catch (InvalidACIPException iae) { - JOptionPane.showMessageDialog(this, - "The ACIP you are trying to convert is invalid:\n" + iae.getMessage()); + getTibDoc().remove(start, i-start); + i += -1 /* because i++ will occur */ + + TibTextUtils.insertTibetanMachineWebForTranslit( + !fromACIP, sb.toString(), getTibDoc(), + start, withWarnings); + } catch (InvalidTransliterationException ite) { + JOptionPane.showMessageDialog( + this, + "The transliteration you are trying to convert is invalid:\n" + + ite.getMessage()); return; } } diff --git a/source/org/thdl/tib/input/Jskad.java b/source/org/thdl/tib/input/Jskad.java index 75da83f..71b7604 100644 --- a/source/org/thdl/tib/input/Jskad.java +++ b/source/org/thdl/tib/input/Jskad.java @@ -503,15 +503,22 @@ public class Jskad extends JPanel implements DocumentListener { } }); convertSelectionMenu.add(TMWACIPItem); - toolsMenu.add(convertSelectionMenu); - JMenuItem wylieTMWItem = new JMenuItem("(Buggy) Convert Wylie to Tibetan Machine Web (non-Unicode)"); + JMenuItem wylieTMWItem = new JMenuItem("Convert Wylie to Tibetan Machine Web (non-Unicode) (no warnings)"); wylieTMWItem.addActionListener(new ThdlActionListener() { + public void theRealActionPerformed(ActionEvent e) { + toTibetan(false, false); + } + }); + convertSelectionMenu.add(wylieTMWItem); + + JMenuItem wylieTMWWarningsItem = new JMenuItem("Convert Wylie to Tibetan Machine Web (non-Unicode) (pedantic warnings)"); + wylieTMWWarningsItem.addActionListener(new ThdlActionListener() { public void theRealActionPerformed(ActionEvent e) { toTibetan(false, true); } }); - convertSelectionMenu.add(wylieTMWItem); + convertSelectionMenu.add(wylieTMWWarningsItem); JMenuItem ACIPTMWItem = new JMenuItem("Convert ACIP to Tibetan Machine Web (non-Unicode) (no warnings)"); ACIPTMWItem.addActionListener(new ThdlActionListener() { @@ -521,7 +528,7 @@ public class Jskad extends JPanel implements DocumentListener { }); convertSelectionMenu.add(ACIPTMWItem); - JMenuItem ACIPTMWWarnItem = new JMenuItem("Convert ACIP to Tibetan Machine Web (non-Unicode) (with pedantic warnings)"); + JMenuItem ACIPTMWWarnItem = new JMenuItem("Convert ACIP to Tibetan Machine Web (non-Unicode) (pedantic warnings)"); ACIPTMWWarnItem.addActionListener(new ThdlActionListener() { public void theRealActionPerformed(ActionEvent e) { toTibetan(true, true); diff --git a/source/org/thdl/tib/text/InvalidACIPException.java b/source/org/thdl/tib/text/InvalidTransliterationException.java similarity index 76% rename from source/org/thdl/tib/text/InvalidACIPException.java rename to source/org/thdl/tib/text/InvalidTransliterationException.java index 8dc8f9f..b12fcb6 100644 --- a/source/org/thdl/tib/text/InvalidACIPException.java +++ b/source/org/thdl/tib/text/InvalidTransliterationException.java @@ -19,17 +19,17 @@ Contributor(s): ______________________________________. package org.thdl.tib.text; /** -* An exception thrown whenever ACIP->TMW conversion in the Jskad GUI -* runs into invalid ACIP. +* An exception thrown whenever an EWTS->TMW or ACIP->TMWconversion in +* the Jskad GUI runs into an invalid transliteration string. * @author David Chandler */ -public class InvalidACIPException extends Exception { +public class InvalidTransliterationException extends Exception { private String error; /** -* Creates an InvalidACIPException. +* Creates an InvalidTransliterationException. * @param s an error message */ - public InvalidACIPException(String s) { + public InvalidTransliterationException(String s) { error = s; } diff --git a/source/org/thdl/tib/text/THDLWylieConstants.java b/source/org/thdl/tib/text/THDLWylieConstants.java index 15d700f..9c355a0 100644 --- a/source/org/thdl/tib/text/THDLWylieConstants.java +++ b/source/org/thdl/tib/text/THDLWylieConstants.java @@ -22,7 +22,15 @@ package org.thdl.tib.text; * @see TibetanMachineWeb */ public interface THDLWylieConstants { /** -* the Wylie for bindu/anusvara +* the Wylie for U+0F82 +*/ + public static final String U0F82 = "~M`"; +/** +* the Wylie for U+0F83 +*/ + public static final String U0F83 = "~M"; +/** +* the Wylie for bindu/anusvara (U+0F7E) */ public static final char BINDU = 'M'; /** @@ -52,6 +60,10 @@ public interface THDLWylieConstants { */ public static final String WYLIE_aVOWEL = "a"; /** +* the Wylie for U+0F39 +*/ + public static final String WYLIE_TSA_PHRU = "^"; +/** * the Wylie for achung */ public static final char ACHUNG_character = '\''; diff --git a/source/org/thdl/tib/text/TibTextUtils.java b/source/org/thdl/tib/text/TibTextUtils.java index 6c7f77e..b606c89 100644 --- a/source/org/thdl/tib/text/TibTextUtils.java +++ b/source/org/thdl/tib/text/TibTextUtils.java @@ -25,7 +25,9 @@ import javax.swing.text.rtf.RTFEditorKit; import java.io.*; import org.thdl.util.ThdlDebug; +import org.thdl.tib.text.ttt.TTraits; import org.thdl.tib.text.ttt.ACIPTraits; +import org.thdl.tib.text.ttt.EWTSTraits; import org.thdl.tib.text.ttt.TConverter; import org.thdl.tib.text.tshegbar.LegalTshegBar; import org.thdl.tib.text.tshegbar.UnicodeConstants; @@ -312,34 +314,44 @@ public class TibTextUtils implements THDLWylieConstants { = new boolean[] { false }; /** -* Converts a string of ACIP into TibetanMachineWeb and inserts that -* into tdoc at offset loc. -* @param acip the ACIP you want to convert +* Converts a string of transliteration into TibetanMachineWeb and +* inserts that into tdoc at offset loc. +* @param EWTSNotACIP true if you want THDL Extended Wylie, false if +* you want ACIP +* @param translit the transliteration you want to convert * @param tdoc the document in which to insert the TMW * @param loc the offset inside the document at which to insert the TMW * @param withWarnings true if and only if you want warnings to appear * in the output, such as "this could be a mistranscription of blah..." -* @throws InvalidACIPException if the ACIP is deemed invalid, i.e. if -* it does not conform to the ACIP transcription rules (those in the -* official document and the subtler rules pieced together by David -* Chandler through study and private correspondence with Robert -* Chilton) +* @throws InvalidTransliterationException if the transliteration is +* deemed invalid, i.e. if it does not conform to the transcription +* rules (those in the official document and the subtler rules pieced +* together by David Chandler through study and private correspondence +* with Robert Chilton (for ACIP), Than Garson, David Germano, Chris +* Fynn, and others) * @return the number of characters inserted into tdoc */ - public static int insertTibetanMachineWebForACIP(String acip, - TibetanDocument tdoc, - int loc, - boolean withWarnings) - throws InvalidACIPException + public static int insertTibetanMachineWebForTranslit(boolean EWTSNotACIP, + String translit, + TibetanDocument tdoc, + int loc, + boolean withWarnings) + throws InvalidTransliterationException { StringBuffer errors = new StringBuffer(); String warningLevel = withWarnings ? "All" : "None"; - ArrayList al = ACIPTraits.instance().scanner().scan(acip, errors, 500, - false, warningLevel); + + TTraits traits = (EWTSNotACIP + ? (TTraits)EWTSTraits.instance() + : (TTraits)ACIPTraits.instance()); + ArrayList al = traits.scanner().scan(translit, errors, 500, + false, warningLevel); if (null == al || errors.length() > 0) { if (errors.length() > 0) - throw new InvalidACIPException(errors.toString()); + throw new InvalidTransliterationException(errors.toString()); else - throw new InvalidACIPException("Fatal error converting ACIP to TMW."); + throw new InvalidTransliterationException("Fatal error converting " + + traits.shortTranslitName() + + " to TMW."); } boolean colors = withWarnings; boolean putWarningsInOutput = false; @@ -348,7 +360,7 @@ public class TibTextUtils implements THDLWylieConstants { } try { int tloc[] = new int[] { loc }; - TConverter.convertToTMW(ACIPTraits.instance(), al, tdoc, null, null, + TConverter.convertToTMW(traits, al, tdoc, null, null, null, putWarningsInOutput, warningLevel, false, colors, tloc); return tloc[0] - loc; @@ -364,8 +376,13 @@ public class TibTextUtils implements THDLWylieConstants { * corresponding to the Wylie text * @throws InvalidWylieException if the Wylie is deemed invalid, * i.e. if it does not conform to the Extended Wylie standard +* @deprecated by insertTibetanMachineWebForTranslit */ public static DuffData[] getTibetanMachineWebForEWTS(String wylie) throws InvalidWylieException { + ThdlDebug.noteIffyCode(); // deprecated method! + // TODO(dchandler): remove it and + // hopefully a ton of code that + // only it uses. List chars = new ArrayList(); DuffCode dc; int start = 0; diff --git a/source/org/thdl/tib/text/ttt/EWTSTest.java b/source/org/thdl/tib/text/ttt/EWTSTest.java index 7cfe654..2dc673d 100644 --- a/source/org/thdl/tib/text/ttt/EWTSTest.java +++ b/source/org/thdl/tib/text/ttt/EWTSTest.java @@ -79,6 +79,11 @@ public class EWTSTest extends TestCase { /** Causes a JUnit test case failure unless the EWTS document ewts * converts to the unicode expectedUnicode. */ static void ewts2uni_test(String ewts, String expectedUnicode) { + // TODO(DLC)[EWTS->Tibetan]: In addition to what this + // currently does, have this function convert to TMW and + // convert that TMW to Unicode and verify that the result is + // the same. Almost every call should allow for that. + StringBuffer errors = new StringBuffer(); String unicode = TConverter.convertToUnicodeText(EWTSTraits.instance(), ewts, errors, diff --git a/source/org/thdl/tib/text/ttt/EWTSTraits.java b/source/org/thdl/tib/text/ttt/EWTSTraits.java index 0233c2a..53b0545 100644 --- a/source/org/thdl/tib/text/ttt/EWTSTraits.java +++ b/source/org/thdl/tib/text/ttt/EWTSTraits.java @@ -164,6 +164,10 @@ public final class EWTSTraits implements TTraits { // TODO(DLC)[EWTS->Tibetan]: I have no confidence in this! test, test, test. + // TODO(DLC)[EWTS->Tibetan]: ko+o doesn't work. kai+-i doesn't work. + + // TODO(DLC)[EWTS->Tibetan]: kai doesn't work. + // Order matters here. boolean context_added[] = new boolean[] { false }; if (wowel.equals(THDLWylieConstants.WYLIE_aVOWEL)) { @@ -183,11 +187,7 @@ public final class EWTSTraits implements TTraits { } if (wowel.indexOf(THDLWylieConstants.ai_VOWEL) >= 0) { TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.ai_VOWEL, context_added); - } - if (wowel.indexOf(THDLWylieConstants.au_VOWEL) >= 0) { - TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.au_VOWEL, context_added); - } - if (wowel.indexOf(THDLWylieConstants.reverse_i_VOWEL) >= 0) { + } else if (wowel.indexOf(THDLWylieConstants.reverse_i_VOWEL) >= 0) { TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.reverse_i_VOWEL, context_added); } else if (wowel.indexOf(THDLWylieConstants.i_VOWEL) >= 0) { TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.i_VOWEL, context_added); @@ -198,7 +198,9 @@ public final class EWTSTraits implements TTraits { if (wowel.indexOf(THDLWylieConstants.o_VOWEL) >= 0) { TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.o_VOWEL, context_added); } - if (wowel.indexOf(THDLWylieConstants.u_VOWEL) >= 0) { + if (wowel.indexOf(THDLWylieConstants.au_VOWEL) >= 0) { + TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.au_VOWEL, context_added); + } else if (wowel.indexOf(THDLWylieConstants.u_VOWEL) >= 0) { TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.u_VOWEL, context_added); } if (wowel.indexOf("~X") >= 0) { // TODO(DLC)[EWTS->Tibetan]: introduce THDLWylieConstants.blah @@ -209,7 +211,12 @@ public final class EWTSTraits implements TTraits { } // FIXME: Use TMW9.61, the "o'i" special combination, when appropriate. - if (wowel.indexOf('M') >= 0) { + if (wowel.indexOf(THDLWylieConstants.BINDU) >= 0 + // TODO(DLC)[EWTS->Tibetan]: This is really ugly... we + // rely on the fact that we know every Wylie wowel that + // contains 'M'. Let's, instead, parse the wowel. + && wowel.indexOf(THDLWylieConstants.U0F82) < 0 + && wowel.indexOf(THDLWylieConstants.U0F83) < 0) { DuffCode last = null; if (!context_added[0]) { last = preceding; @@ -219,10 +226,35 @@ public final class EWTSTraits implements TTraits { // TODO(DLC)[EWTS->Tibetan]: is this okay???? when is a bindu okay to be alone??? } TibTextUtils.getBindu(duff, last); + context_added[0] = true; + } + if (!context_added[0]) { + duff.add(preceding); } if (wowel.indexOf('H') >= 0) duff.add(TibetanMachineWeb.getGlyph("H")); - + int ix; + if ((ix = wowel.indexOf(THDLWylieConstants.WYLIE_TSA_PHRU)) >= 0) { + // This likely won't look good! TMW has glyphs for [va] + // and [fa], so use that transliteration if you care, not + // [ph^] or [b^]. + duff.add(TibetanMachineWeb.getGlyph(THDLWylieConstants.WYLIE_TSA_PHRU)); + StringBuffer sb = new StringBuffer(wowel); + sb.replace(ix, ix + THDLWylieConstants.WYLIE_TSA_PHRU.length(), ""); + wowel = sb.toString(); + } + if ((ix = wowel.indexOf(THDLWylieConstants.U0F82)) >= 0) { + duff.add(TibetanMachineWeb.getGlyph(THDLWylieConstants.U0F82)); + StringBuffer sb = new StringBuffer(wowel); + sb.replace(ix, ix + THDLWylieConstants.U0F82.length(), ""); + wowel = sb.toString(); + } + if ((ix = wowel.indexOf(THDLWylieConstants.U0F83)) >= 0) { + duff.add(TibetanMachineWeb.getGlyph(THDLWylieConstants.U0F83)); + StringBuffer sb = new StringBuffer(wowel); + sb.replace(ix, ix + THDLWylieConstants.U0F83.length(), ""); + wowel = sb.toString(); + } // TODO(DLC)[EWTS->Tibetan]: verify that no part of wowel is discarded! acip does that. 'jam~X I think we screw up, e.g.