EWTS->TMW fixes. Wowel handling still isn't perfect but I'm lazy.
Jskad now uses the new EWTS->TMW routine, not the old, and thus the "(Buggy)" label is [unfairly, perhaps] dropped.
This commit is contained in:
parent
0f99c402df
commit
982350371d
7 changed files with 129 additions and 86 deletions
|
@ -19,17 +19,17 @@ Contributor(s): ______________________________________.
|
|||
package org.thdl.tib.text;
|
||||
|
||||
/**
|
||||
* An exception thrown whenever ACIP->TMW conversion in the Jskad GUI
|
||||
* runs into invalid ACIP.
|
||||
* An exception thrown whenever an EWTS->TMW or ACIP->TMWconversion in
|
||||
* the Jskad GUI runs into an invalid transliteration string.
|
||||
* @author David Chandler */
|
||||
public class InvalidACIPException extends Exception {
|
||||
public class InvalidTransliterationException extends Exception {
|
||||
private String error;
|
||||
|
||||
/**
|
||||
* Creates an InvalidACIPException.
|
||||
* Creates an InvalidTransliterationException.
|
||||
* @param s an error message
|
||||
*/
|
||||
public InvalidACIPException(String s) {
|
||||
public InvalidTransliterationException(String s) {
|
||||
error = s;
|
||||
}
|
||||
|
|
@ -22,7 +22,15 @@ package org.thdl.tib.text;
|
|||
* @see TibetanMachineWeb */
|
||||
public interface THDLWylieConstants {
|
||||
/**
|
||||
* the Wylie for bindu/anusvara
|
||||
* the Wylie for U+0F82
|
||||
*/
|
||||
public static final String U0F82 = "~M`";
|
||||
/**
|
||||
* the Wylie for U+0F83
|
||||
*/
|
||||
public static final String U0F83 = "~M";
|
||||
/**
|
||||
* the Wylie for bindu/anusvara (U+0F7E)
|
||||
*/
|
||||
public static final char BINDU = 'M';
|
||||
/**
|
||||
|
@ -52,6 +60,10 @@ public interface THDLWylieConstants {
|
|||
*/
|
||||
public static final String WYLIE_aVOWEL = "a";
|
||||
/**
|
||||
* the Wylie for U+0F39
|
||||
*/
|
||||
public static final String WYLIE_TSA_PHRU = "^";
|
||||
/**
|
||||
* the Wylie for achung
|
||||
*/
|
||||
public static final char ACHUNG_character = '\'';
|
||||
|
|
|
@ -25,7 +25,9 @@ import javax.swing.text.rtf.RTFEditorKit;
|
|||
import java.io.*;
|
||||
|
||||
import org.thdl.util.ThdlDebug;
|
||||
import org.thdl.tib.text.ttt.TTraits;
|
||||
import org.thdl.tib.text.ttt.ACIPTraits;
|
||||
import org.thdl.tib.text.ttt.EWTSTraits;
|
||||
import org.thdl.tib.text.ttt.TConverter;
|
||||
import org.thdl.tib.text.tshegbar.LegalTshegBar;
|
||||
import org.thdl.tib.text.tshegbar.UnicodeConstants;
|
||||
|
@ -312,34 +314,44 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
= new boolean[] { false };
|
||||
|
||||
/**
|
||||
* Converts a string of ACIP into TibetanMachineWeb and inserts that
|
||||
* into tdoc at offset loc.
|
||||
* @param acip the ACIP you want to convert
|
||||
* Converts a string of transliteration into TibetanMachineWeb and
|
||||
* inserts that into tdoc at offset loc.
|
||||
* @param EWTSNotACIP true if you want THDL Extended Wylie, false if
|
||||
* you want ACIP
|
||||
* @param translit the transliteration you want to convert
|
||||
* @param tdoc the document in which to insert the TMW
|
||||
* @param loc the offset inside the document at which to insert the TMW
|
||||
* @param withWarnings true if and only if you want warnings to appear
|
||||
* in the output, such as "this could be a mistranscription of blah..."
|
||||
* @throws InvalidACIPException if the ACIP is deemed invalid, i.e. if
|
||||
* it does not conform to the ACIP transcription rules (those in the
|
||||
* official document and the subtler rules pieced together by David
|
||||
* Chandler through study and private correspondence with Robert
|
||||
* Chilton)
|
||||
* @throws InvalidTransliterationException if the transliteration is
|
||||
* deemed invalid, i.e. if it does not conform to the transcription
|
||||
* rules (those in the official document and the subtler rules pieced
|
||||
* together by David Chandler through study and private correspondence
|
||||
* with Robert Chilton (for ACIP), Than Garson, David Germano, Chris
|
||||
* Fynn, and others)
|
||||
* @return the number of characters inserted into tdoc */
|
||||
public static int insertTibetanMachineWebForACIP(String acip,
|
||||
TibetanDocument tdoc,
|
||||
int loc,
|
||||
boolean withWarnings)
|
||||
throws InvalidACIPException
|
||||
public static int insertTibetanMachineWebForTranslit(boolean EWTSNotACIP,
|
||||
String translit,
|
||||
TibetanDocument tdoc,
|
||||
int loc,
|
||||
boolean withWarnings)
|
||||
throws InvalidTransliterationException
|
||||
{
|
||||
StringBuffer errors = new StringBuffer();
|
||||
String warningLevel = withWarnings ? "All" : "None";
|
||||
ArrayList al = ACIPTraits.instance().scanner().scan(acip, errors, 500,
|
||||
false, warningLevel);
|
||||
|
||||
TTraits traits = (EWTSNotACIP
|
||||
? (TTraits)EWTSTraits.instance()
|
||||
: (TTraits)ACIPTraits.instance());
|
||||
ArrayList al = traits.scanner().scan(translit, errors, 500,
|
||||
false, warningLevel);
|
||||
if (null == al || errors.length() > 0) {
|
||||
if (errors.length() > 0)
|
||||
throw new InvalidACIPException(errors.toString());
|
||||
throw new InvalidTransliterationException(errors.toString());
|
||||
else
|
||||
throw new InvalidACIPException("Fatal error converting ACIP to TMW.");
|
||||
throw new InvalidTransliterationException("Fatal error converting "
|
||||
+ traits.shortTranslitName()
|
||||
+ " to TMW.");
|
||||
}
|
||||
boolean colors = withWarnings;
|
||||
boolean putWarningsInOutput = false;
|
||||
|
@ -348,7 +360,7 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
}
|
||||
try {
|
||||
int tloc[] = new int[] { loc };
|
||||
TConverter.convertToTMW(ACIPTraits.instance(), al, tdoc, null, null,
|
||||
TConverter.convertToTMW(traits, al, tdoc, null, null,
|
||||
null, putWarningsInOutput, warningLevel,
|
||||
false, colors, tloc);
|
||||
return tloc[0] - loc;
|
||||
|
@ -364,8 +376,13 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
* corresponding to the Wylie text
|
||||
* @throws InvalidWylieException if the Wylie is deemed invalid,
|
||||
* i.e. if it does not conform to the Extended Wylie standard
|
||||
* @deprecated by insertTibetanMachineWebForTranslit
|
||||
*/
|
||||
public static DuffData[] getTibetanMachineWebForEWTS(String wylie) throws InvalidWylieException {
|
||||
ThdlDebug.noteIffyCode(); // deprecated method!
|
||||
// TODO(dchandler): remove it and
|
||||
// hopefully a ton of code that
|
||||
// only it uses.
|
||||
List chars = new ArrayList();
|
||||
DuffCode dc;
|
||||
int start = 0;
|
||||
|
|
|
@ -79,6 +79,11 @@ public class EWTSTest extends TestCase {
|
|||
/** Causes a JUnit test case failure unless the EWTS document ewts
|
||||
* converts to the unicode expectedUnicode. */
|
||||
static void ewts2uni_test(String ewts, String expectedUnicode) {
|
||||
// TODO(DLC)[EWTS->Tibetan]: In addition to what this
|
||||
// currently does, have this function convert to TMW and
|
||||
// convert that TMW to Unicode and verify that the result is
|
||||
// the same. Almost every call should allow for that.
|
||||
|
||||
StringBuffer errors = new StringBuffer();
|
||||
String unicode = TConverter.convertToUnicodeText(EWTSTraits.instance(),
|
||||
ewts, errors,
|
||||
|
|
|
@ -164,6 +164,10 @@ public final class EWTSTraits implements TTraits {
|
|||
|
||||
// TODO(DLC)[EWTS->Tibetan]: I have no confidence in this! test, test, test.
|
||||
|
||||
// TODO(DLC)[EWTS->Tibetan]: ko+o doesn't work. kai+-i doesn't work.
|
||||
|
||||
// TODO(DLC)[EWTS->Tibetan]: kai doesn't work.
|
||||
|
||||
// Order matters here.
|
||||
boolean context_added[] = new boolean[] { false };
|
||||
if (wowel.equals(THDLWylieConstants.WYLIE_aVOWEL)) {
|
||||
|
@ -183,11 +187,7 @@ public final class EWTSTraits implements TTraits {
|
|||
}
|
||||
if (wowel.indexOf(THDLWylieConstants.ai_VOWEL) >= 0) {
|
||||
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.ai_VOWEL, context_added);
|
||||
}
|
||||
if (wowel.indexOf(THDLWylieConstants.au_VOWEL) >= 0) {
|
||||
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.au_VOWEL, context_added);
|
||||
}
|
||||
if (wowel.indexOf(THDLWylieConstants.reverse_i_VOWEL) >= 0) {
|
||||
} else if (wowel.indexOf(THDLWylieConstants.reverse_i_VOWEL) >= 0) {
|
||||
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.reverse_i_VOWEL, context_added);
|
||||
} else if (wowel.indexOf(THDLWylieConstants.i_VOWEL) >= 0) {
|
||||
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.i_VOWEL, context_added);
|
||||
|
@ -198,7 +198,9 @@ public final class EWTSTraits implements TTraits {
|
|||
if (wowel.indexOf(THDLWylieConstants.o_VOWEL) >= 0) {
|
||||
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.o_VOWEL, context_added);
|
||||
}
|
||||
if (wowel.indexOf(THDLWylieConstants.u_VOWEL) >= 0) {
|
||||
if (wowel.indexOf(THDLWylieConstants.au_VOWEL) >= 0) {
|
||||
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.au_VOWEL, context_added);
|
||||
} else if (wowel.indexOf(THDLWylieConstants.u_VOWEL) >= 0) {
|
||||
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.u_VOWEL, context_added);
|
||||
}
|
||||
if (wowel.indexOf("~X") >= 0) { // TODO(DLC)[EWTS->Tibetan]: introduce THDLWylieConstants.blah
|
||||
|
@ -209,7 +211,12 @@ public final class EWTSTraits implements TTraits {
|
|||
}
|
||||
// FIXME: Use TMW9.61, the "o'i" special combination, when appropriate.
|
||||
|
||||
if (wowel.indexOf('M') >= 0) {
|
||||
if (wowel.indexOf(THDLWylieConstants.BINDU) >= 0
|
||||
// TODO(DLC)[EWTS->Tibetan]: This is really ugly... we
|
||||
// rely on the fact that we know every Wylie wowel that
|
||||
// contains 'M'. Let's, instead, parse the wowel.
|
||||
&& wowel.indexOf(THDLWylieConstants.U0F82) < 0
|
||||
&& wowel.indexOf(THDLWylieConstants.U0F83) < 0) {
|
||||
DuffCode last = null;
|
||||
if (!context_added[0]) {
|
||||
last = preceding;
|
||||
|
@ -219,10 +226,35 @@ public final class EWTSTraits implements TTraits {
|
|||
// TODO(DLC)[EWTS->Tibetan]: is this okay???? when is a bindu okay to be alone???
|
||||
}
|
||||
TibTextUtils.getBindu(duff, last);
|
||||
context_added[0] = true;
|
||||
}
|
||||
if (!context_added[0]) {
|
||||
duff.add(preceding);
|
||||
}
|
||||
if (wowel.indexOf('H') >= 0)
|
||||
duff.add(TibetanMachineWeb.getGlyph("H"));
|
||||
|
||||
int ix;
|
||||
if ((ix = wowel.indexOf(THDLWylieConstants.WYLIE_TSA_PHRU)) >= 0) {
|
||||
// This likely won't look good! TMW has glyphs for [va]
|
||||
// and [fa], so use that transliteration if you care, not
|
||||
// [ph^] or [b^].
|
||||
duff.add(TibetanMachineWeb.getGlyph(THDLWylieConstants.WYLIE_TSA_PHRU));
|
||||
StringBuffer sb = new StringBuffer(wowel);
|
||||
sb.replace(ix, ix + THDLWylieConstants.WYLIE_TSA_PHRU.length(), "");
|
||||
wowel = sb.toString();
|
||||
}
|
||||
if ((ix = wowel.indexOf(THDLWylieConstants.U0F82)) >= 0) {
|
||||
duff.add(TibetanMachineWeb.getGlyph(THDLWylieConstants.U0F82));
|
||||
StringBuffer sb = new StringBuffer(wowel);
|
||||
sb.replace(ix, ix + THDLWylieConstants.U0F82.length(), "");
|
||||
wowel = sb.toString();
|
||||
}
|
||||
if ((ix = wowel.indexOf(THDLWylieConstants.U0F83)) >= 0) {
|
||||
duff.add(TibetanMachineWeb.getGlyph(THDLWylieConstants.U0F83));
|
||||
StringBuffer sb = new StringBuffer(wowel);
|
||||
sb.replace(ix, ix + THDLWylieConstants.U0F83.length(), "");
|
||||
wowel = sb.toString();
|
||||
}
|
||||
|
||||
// TODO(DLC)[EWTS->Tibetan]: verify that no part of wowel is discarded! acip does that. 'jam~X I think we screw up, e.g.
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue