Added UI for EWTS->Tibetan conversions. GUI is disabled except in
debug mode for now. I tested against a really simple-but-real document, found a bug with '*', tried to implement TMW vowel code but I don't trust it yet. Differentiated EWTS code from ACIP where needed. Several bugs in ewts->tibetan have been exposed; see the TODO comments.
This commit is contained in:
parent
7198f23361
commit
2678fc134a
9 changed files with 150 additions and 34 deletions
|
@ -223,10 +223,13 @@ class ConvertDialog extends JDialog
|
||||||
JButton src = (JButton)ae.getSource();
|
JButton src = (JButton)ae.getSource();
|
||||||
if (src == browseOld) {
|
if (src == browseOld) {
|
||||||
jfc.setFileFilter((ACIP_TO_UNI_TEXT.equals((String)choices.getSelectedItem())
|
jfc.setFileFilter((ACIP_TO_UNI_TEXT.equals((String)choices.getSelectedItem())
|
||||||
|| ACIP_TO_TMW.equals((String)choices.getSelectedItem()))
|
|| WYLIE_TO_UNI_TEXT.equals((String)choices.getSelectedItem())
|
||||||
|
|| ACIP_TO_TMW.equals((String)choices.getSelectedItem())
|
||||||
|
|| WYLIE_TO_TMW.equals((String)choices.getSelectedItem()))
|
||||||
? acipff : rtfff);
|
? acipff : rtfff);
|
||||||
} else {
|
} else {
|
||||||
jfc.setFileFilter((ACIP_TO_UNI_TEXT.equals((String)choices.getSelectedItem())
|
jfc.setFileFilter((ACIP_TO_UNI_TEXT.equals((String)choices.getSelectedItem())
|
||||||
|
|| WYLIE_TO_UNI_TEXT.equals((String)choices.getSelectedItem())
|
||||||
|| TMW_TO_ACIP_TEXT.equals((String)choices.getSelectedItem())
|
|| TMW_TO_ACIP_TEXT.equals((String)choices.getSelectedItem())
|
||||||
|| TMW_TO_WYLIE_TEXT.equals((String)choices.getSelectedItem()))
|
|| TMW_TO_WYLIE_TEXT.equals((String)choices.getSelectedItem()))
|
||||||
? acipff : rtfff);
|
? acipff : rtfff);
|
||||||
|
@ -457,7 +460,7 @@ class ConvertDialog extends JDialog
|
||||||
} else if (FIND_ALL_NON_TM == ct) {
|
} else if (FIND_ALL_NON_TM == ct) {
|
||||||
newFileNamePrefix = "AllNonTM__";
|
newFileNamePrefix = "AllNonTM__";
|
||||||
newFileNameExtension = ".TXT";
|
newFileNameExtension = ".TXT";
|
||||||
} else if (TMW_TO_SAME_TWM == ct) {
|
} else if (TMW_TO_SAME_TMW == ct) {
|
||||||
newFileNamePrefix = "TMW_to_same_TMW__";
|
newFileNamePrefix = "TMW_to_same_TMW__";
|
||||||
newFileNameExtension = ".RTF";
|
newFileNameExtension = ".RTF";
|
||||||
} else { // conversion mode
|
} else { // conversion mode
|
||||||
|
@ -471,13 +474,15 @@ class ConvertDialog extends JDialog
|
||||||
} else if (TMW_TO_ACIP_TEXT == ct) {
|
} else if (TMW_TO_ACIP_TEXT == ct) {
|
||||||
newFileNamePrefix = suggested_ACIP_prefix;
|
newFileNamePrefix = suggested_ACIP_prefix;
|
||||||
newFileNameExtension = ".TXT";
|
newFileNameExtension = ".TXT";
|
||||||
} else if (TMW_TO_UNI == ct || ACIP_TO_UNI_TEXT == ct) {
|
} else if (TMW_TO_UNI == ct || ACIP_TO_UNI_TEXT == ct
|
||||||
|
|| WYLIE_TO_UNI_TEXT == ct) {
|
||||||
newFileNamePrefix = suggested_TO_UNI_prefix;
|
newFileNamePrefix = suggested_TO_UNI_prefix;
|
||||||
if (ACIP_TO_UNI_TEXT == ct)
|
if (ACIP_TO_UNI_TEXT == ct || WYLIE_TO_UNI_TEXT == ct)
|
||||||
newFileNameExtension = ".TXT";
|
newFileNameExtension = ".TXT";
|
||||||
} else if (TM_TO_TMW == ct || ACIP_TO_TMW == ct) {
|
} else if (TM_TO_TMW == ct || ACIP_TO_TMW == ct
|
||||||
|
|| WYLIE_TO_TMW == ct) {
|
||||||
newFileNamePrefix = suggested_TO_TMW_prefix;
|
newFileNamePrefix = suggested_TO_TMW_prefix;
|
||||||
if (ACIP_TO_TMW == ct)
|
if (ACIP_TO_TMW == ct || WYLIE_TO_TMW == ct)
|
||||||
newFileNameExtension = ".RTF";
|
newFileNameExtension = ".RTF";
|
||||||
} else {
|
} else {
|
||||||
ThdlDebug.verify(TMW_TO_TM == ct);
|
ThdlDebug.verify(TMW_TO_TM == ct);
|
||||||
|
@ -509,6 +514,7 @@ class ConvertDialog extends JDialog
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO(DLC)[EWTS->Tibetan]: we use for wylie (ewts) too...
|
||||||
public class ACIPFileFilter extends javax.swing.filechooser.FileFilter
|
public class ACIPFileFilter extends javax.swing.filechooser.FileFilter
|
||||||
{
|
{
|
||||||
public boolean accept(File f)
|
public boolean accept(File f)
|
||||||
|
|
|
@ -26,7 +26,9 @@ import java.awt.*;
|
||||||
@author Nathaniel Garson, Tibetan and Himalayan Digital Library */
|
@author Nathaniel Garson, Tibetan and Himalayan Digital Library */
|
||||||
interface FontConverterConstants
|
interface FontConverterConstants
|
||||||
{
|
{
|
||||||
final String TMW_TO_SAME_TWM = "TMW to the same TMW (for testing only) (RTF->RTF)";
|
final String WYLIE_TO_UNI_TEXT = "Wylie to Unicode (Text->Text)";
|
||||||
|
final String WYLIE_TO_TMW = "Wylie to TMW (Text->RTF)";
|
||||||
|
final String TMW_TO_SAME_TMW = "TMW to the same TMW (for testing only) (RTF->RTF)";
|
||||||
final String ACIP_TO_UNI_TEXT = "ACIP to Unicode (Text->Text)";
|
final String ACIP_TO_UNI_TEXT = "ACIP to Unicode (Text->Text)";
|
||||||
final String ACIP_TO_TMW = "ACIP to TMW (Text->RTF)";
|
final String ACIP_TO_TMW = "ACIP to TMW (Text->RTF)";
|
||||||
final String TMW_TO_ACIP = "TMW to ACIP (RTF->RTF)";
|
final String TMW_TO_ACIP = "TMW to ACIP (RTF->RTF)";
|
||||||
|
@ -42,6 +44,10 @@ interface FontConverterConstants
|
||||||
final String FIND_ALL_NON_TM = "Find all non-TM (in RTF)";
|
final String FIND_ALL_NON_TM = "Find all non-TM (in RTF)";
|
||||||
|
|
||||||
final String[] CHOICES = new String[] {
|
final String[] CHOICES = new String[] {
|
||||||
|
/* TODO(DLC)[EWTS->Tibetan]: once we're done debugging:
|
||||||
|
WYLIE_TO_UNI_TEXT,
|
||||||
|
WYLIE_TO_TMW,
|
||||||
|
*/
|
||||||
ACIP_TO_UNI_TEXT,
|
ACIP_TO_UNI_TEXT,
|
||||||
ACIP_TO_TMW,
|
ACIP_TO_TMW,
|
||||||
TMW_TO_ACIP,
|
TMW_TO_ACIP,
|
||||||
|
@ -58,7 +64,9 @@ interface FontConverterConstants
|
||||||
};
|
};
|
||||||
|
|
||||||
final String[] DEBUG_CHOICES = new String[] {
|
final String[] DEBUG_CHOICES = new String[] {
|
||||||
TMW_TO_SAME_TWM,
|
TMW_TO_SAME_TMW,
|
||||||
|
WYLIE_TO_UNI_TEXT,
|
||||||
|
WYLIE_TO_TMW,
|
||||||
ACIP_TO_UNI_TEXT,
|
ACIP_TO_UNI_TEXT,
|
||||||
ACIP_TO_TMW,
|
ACIP_TO_TMW,
|
||||||
TMW_TO_ACIP,
|
TMW_TO_ACIP,
|
||||||
|
|
|
@ -28,6 +28,8 @@ import org.thdl.tib.text.*;
|
||||||
|
|
||||||
import org.thdl.tib.text.ttt.TConverter;
|
import org.thdl.tib.text.ttt.TConverter;
|
||||||
import org.thdl.tib.text.ttt.ACIPTraits;
|
import org.thdl.tib.text.ttt.ACIPTraits;
|
||||||
|
import org.thdl.tib.text.ttt.EWTSTraits;
|
||||||
|
import org.thdl.tib.text.ttt.TTraits;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
|
||||||
/** TibetanConverter is a command-line utility for converting to and
|
/** TibetanConverter is a command-line utility for converting to and
|
||||||
|
@ -71,6 +73,8 @@ public class TibetanConverter implements FontConverterConstants {
|
||||||
boolean convertToTMMode = false;
|
boolean convertToTMMode = false;
|
||||||
boolean convertACIPToUniMode = false;
|
boolean convertACIPToUniMode = false;
|
||||||
boolean convertACIPToTMWMode = false;
|
boolean convertACIPToTMWMode = false;
|
||||||
|
boolean convertWylieToUniMode = false;
|
||||||
|
boolean convertWylieToTMWMode = false;
|
||||||
boolean convertToTMWMode = false;
|
boolean convertToTMWMode = false;
|
||||||
boolean convertToWylieRTFMode = false;
|
boolean convertToWylieRTFMode = false;
|
||||||
boolean convertToWylieTextMode = false;
|
boolean convertToWylieTextMode = false;
|
||||||
|
@ -116,6 +120,10 @@ public class TibetanConverter implements FontConverterConstants {
|
||||||
= args[numArgs - 2].equals("--acip-to-unicode"))
|
= args[numArgs - 2].equals("--acip-to-unicode"))
|
||||||
|| (convertACIPToTMWMode
|
|| (convertACIPToTMWMode
|
||||||
= args[numArgs - 2].equals("--acip-to-tmw"))
|
= args[numArgs - 2].equals("--acip-to-tmw"))
|
||||||
|
|| (convertWylieToUniMode
|
||||||
|
= args[numArgs - 2].equals("--wylie-to-unicode"))
|
||||||
|
|| (convertWylieToTMWMode
|
||||||
|
= args[numArgs - 2].equals("--wylie-to-tmw"))
|
||||||
|| (convertToUnicodeMode
|
|| (convertToUnicodeMode
|
||||||
= args[numArgs - 2].equals("--to-unicode"))
|
= args[numArgs - 2].equals("--to-unicode"))
|
||||||
|| (convertToWylieRTFMode
|
|| (convertToWylieRTFMode
|
||||||
|
@ -147,6 +155,7 @@ public class TibetanConverter implements FontConverterConstants {
|
||||||
out.println(" | --to-tibetan-machine | --to-tibetan-machine-web");
|
out.println(" | --to-tibetan-machine | --to-tibetan-machine-web");
|
||||||
out.println(" | --to-unicode | --to-wylie | --to-acip");
|
out.println(" | --to-unicode | --to-wylie | --to-acip");
|
||||||
out.println(" | --to-wylie-text | --to-acip-text");
|
out.println(" | --to-wylie-text | --to-acip-text");
|
||||||
|
out.println(" | --wylie-to-unicode | --wylie-to-tmw");
|
||||||
out.println(" | --acip-to-unicode | --acip-to-tmw RTF_file|TXT_file");
|
out.println(" | --acip-to-unicode | --acip-to-tmw RTF_file|TXT_file");
|
||||||
out.println(" | TibetanConverter [--version | -v | --help | -h]");
|
out.println(" | TibetanConverter [--version | -v | --help | -h]");
|
||||||
out.println("");
|
out.println("");
|
||||||
|
@ -251,13 +260,17 @@ public class TibetanConverter implements FontConverterConstants {
|
||||||
} else if (convertToUnicodeMode) {
|
} else if (convertToUnicodeMode) {
|
||||||
conversionTag = TMW_TO_UNI;
|
conversionTag = TMW_TO_UNI;
|
||||||
} else if (convertTmwToTmwMode) {
|
} else if (convertTmwToTmwMode) {
|
||||||
conversionTag = TMW_TO_SAME_TWM;
|
conversionTag = TMW_TO_SAME_TMW;
|
||||||
} else if (convertToTMWMode) {
|
} else if (convertToTMWMode) {
|
||||||
conversionTag = TM_TO_TMW;
|
conversionTag = TM_TO_TMW;
|
||||||
} else if (convertACIPToUniMode) {
|
} else if (convertACIPToUniMode) {
|
||||||
conversionTag = ACIP_TO_UNI_TEXT;
|
conversionTag = ACIP_TO_UNI_TEXT;
|
||||||
} else if (convertACIPToTMWMode) {
|
} else if (convertACIPToTMWMode) {
|
||||||
conversionTag = ACIP_TO_TMW;
|
conversionTag = ACIP_TO_TMW;
|
||||||
|
} else if (convertWylieToUniMode) {
|
||||||
|
conversionTag = WYLIE_TO_UNI_TEXT;
|
||||||
|
} else if (convertWylieToTMWMode) {
|
||||||
|
conversionTag = WYLIE_TO_TMW;
|
||||||
} else {
|
} else {
|
||||||
ThdlDebug.verify(convertToTMMode);
|
ThdlDebug.verify(convertToTMMode);
|
||||||
conversionTag = TMW_TO_TM;
|
conversionTag = TMW_TO_TM;
|
||||||
|
@ -294,11 +307,16 @@ public class TibetanConverter implements FontConverterConstants {
|
||||||
static int reallyConvert(InputStream in, PrintStream out, String ct,
|
static int reallyConvert(InputStream in, PrintStream out, String ct,
|
||||||
String warningLevel, boolean shortMessages,
|
String warningLevel, boolean shortMessages,
|
||||||
boolean colors) {
|
boolean colors) {
|
||||||
if (ACIP_TO_UNI_TEXT == ct || ACIP_TO_TMW == ct) {
|
if (ACIP_TO_UNI_TEXT == ct || ACIP_TO_TMW == ct
|
||||||
|
|| WYLIE_TO_UNI_TEXT == ct || WYLIE_TO_TMW == ct) {
|
||||||
try {
|
try {
|
||||||
ArrayList al
|
ArrayList al
|
||||||
= ACIPTraits.instance().scanner().scanStream(in, null,
|
= ((ACIP_TO_UNI_TEXT == ct || ACIP_TO_TMW == ct)
|
||||||
ThdlOptions.getIntegerOption("thdl.most.errors.a.tibetan.acip.document.can.have",
|
? (TTraits)ACIPTraits.instance()
|
||||||
|
: (TTraits)EWTSTraits.instance()).scanner().scanStream(in, null,
|
||||||
|
ThdlOptions.getIntegerOption((ACIP_TO_UNI_TEXT == ct || ACIP_TO_TMW == ct)
|
||||||
|
? "thdl.most.errors.a.tibetan.acip.document.can.have"
|
||||||
|
: "thdl.most.errors.a.tibetan.ewts.document.can.have",
|
||||||
1000 - 1),
|
1000 - 1),
|
||||||
shortMessages,
|
shortMessages,
|
||||||
warningLevel);
|
warningLevel);
|
||||||
|
@ -306,8 +324,11 @@ public class TibetanConverter implements FontConverterConstants {
|
||||||
return 47;
|
return 47;
|
||||||
boolean embeddedWarnings = (warningLevel != "None");
|
boolean embeddedWarnings = (warningLevel != "None");
|
||||||
boolean hasWarnings[] = new boolean[] { false };
|
boolean hasWarnings[] = new boolean[] { false };
|
||||||
if (ACIP_TO_UNI_TEXT == ct) {
|
if (ACIP_TO_UNI_TEXT == ct
|
||||||
if (!TConverter.convertToUnicodeText(ACIPTraits.instance(),
|
|| WYLIE_TO_UNI_TEXT == ct) {
|
||||||
|
if (!TConverter.convertToUnicodeText((WYLIE_TO_UNI_TEXT == ct)
|
||||||
|
? (TTraits)EWTSTraits.instance()
|
||||||
|
: (TTraits)ACIPTraits.instance(),
|
||||||
al, out, null,
|
al, out, null,
|
||||||
null, hasWarnings,
|
null, hasWarnings,
|
||||||
embeddedWarnings,
|
embeddedWarnings,
|
||||||
|
@ -315,8 +336,9 @@ public class TibetanConverter implements FontConverterConstants {
|
||||||
shortMessages))
|
shortMessages))
|
||||||
return 46;
|
return 46;
|
||||||
} else {
|
} else {
|
||||||
if (ct != ACIP_TO_TMW) throw new Error("badness");
|
if (!TConverter.convertToTMW((WYLIE_TO_TMW == ct)
|
||||||
if (!TConverter.convertToTMW(ACIPTraits.instance(),
|
? (TTraits)EWTSTraits.instance()
|
||||||
|
: (TTraits)ACIPTraits.instance(),
|
||||||
al, out, null, null,
|
al, out, null, null,
|
||||||
hasWarnings,
|
hasWarnings,
|
||||||
embeddedWarnings,
|
embeddedWarnings,
|
||||||
|
@ -402,7 +424,7 @@ public class TibetanConverter implements FontConverterConstants {
|
||||||
|
|
||||||
int exitCode = 0;
|
int exitCode = 0;
|
||||||
ThdlDebug.verify(((TMW_TO_TM == ct) ? 1 : 0)
|
ThdlDebug.verify(((TMW_TO_TM == ct) ? 1 : 0)
|
||||||
+ ((TMW_TO_SAME_TWM == ct) ? 1 : 0)
|
+ ((TMW_TO_SAME_TMW == ct) ? 1 : 0)
|
||||||
+ ((TMW_TO_UNI == ct) ? 1 : 0)
|
+ ((TMW_TO_UNI == ct) ? 1 : 0)
|
||||||
+ ((TM_TO_TMW == ct) ? 1 : 0)
|
+ ((TM_TO_TMW == ct) ? 1 : 0)
|
||||||
+ ((TMW_TO_ACIP == ct) ? 1 : 0)
|
+ ((TMW_TO_ACIP == ct) ? 1 : 0)
|
||||||
|
@ -411,7 +433,7 @@ public class TibetanConverter implements FontConverterConstants {
|
||||||
+ ((TMW_TO_WYLIE_TEXT == ct) ? 1 : 0)
|
+ ((TMW_TO_WYLIE_TEXT == ct) ? 1 : 0)
|
||||||
== 1);
|
== 1);
|
||||||
long numAttemptedReplacements[] = new long[] { 0 };
|
long numAttemptedReplacements[] = new long[] { 0 };
|
||||||
if (TMW_TO_SAME_TWM == ct) {
|
if (TMW_TO_SAME_TMW == ct) {
|
||||||
// Identity conversion for testing
|
// Identity conversion for testing
|
||||||
if (tdoc.identityTmwToTmwConversion(0,
|
if (tdoc.identityTmwToTmwConversion(0,
|
||||||
tdoc.getLength(),
|
tdoc.getLength(),
|
||||||
|
|
|
@ -560,6 +560,7 @@ public final class ACIPTraits implements TTraits {
|
||||||
} else if (wowel.indexOf("'I") >= 0) {
|
} else if (wowel.indexOf("'I") >= 0) {
|
||||||
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.I_VOWEL, context_added);
|
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.I_VOWEL, context_added);
|
||||||
} else {
|
} else {
|
||||||
|
// TODO(dchandler): I don't understand why we go from else ifs to this form...
|
||||||
if (wowel.indexOf('\'') >= 0) {
|
if (wowel.indexOf('\'') >= 0) {
|
||||||
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.A_VOWEL, context_added);
|
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.A_VOWEL, context_added);
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,6 +23,8 @@ package org.thdl.tib.text.ttt;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
|
||||||
import org.thdl.tib.text.DuffCode;
|
import org.thdl.tib.text.DuffCode;
|
||||||
|
import org.thdl.tib.text.THDLWylieConstants;
|
||||||
|
import org.thdl.tib.text.TibTextUtils;
|
||||||
import org.thdl.tib.text.TibetanMachineWeb;
|
import org.thdl.tib.text.TibetanMachineWeb;
|
||||||
import org.thdl.util.ThdlDebug;
|
import org.thdl.util.ThdlDebug;
|
||||||
|
|
||||||
|
@ -154,7 +156,70 @@ public final class EWTSTraits implements TTraits {
|
||||||
public TTshegBarScanner scanner() { return EWTSTshegBarScanner.instance(); }
|
public TTshegBarScanner scanner() { return EWTSTshegBarScanner.instance(); }
|
||||||
|
|
||||||
public void getDuffForWowel(ArrayList duff, DuffCode preceding, String wowel) {
|
public void getDuffForWowel(ArrayList duff, DuffCode preceding, String wowel) {
|
||||||
throw new Error("TODO(DLC)[EWTS->Tibetan]");
|
|
||||||
|
// TODO(DLC)[EWTS->Tibetan]: I have no confidence in this! test, test, test.
|
||||||
|
|
||||||
|
// Order matters here.
|
||||||
|
boolean context_added[] = new boolean[] { false };
|
||||||
|
if (wowel.equals(THDLWylieConstants.WYLIE_aVOWEL)) {
|
||||||
|
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.WYLIE_aVOWEL, context_added);
|
||||||
|
} else {
|
||||||
|
// TODO(DLC)[EWTS->Tibetan]: test vowel stacking
|
||||||
|
if (wowel.indexOf(THDLWylieConstants.U_VOWEL) >= 0) {
|
||||||
|
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.U_VOWEL, context_added);
|
||||||
|
}
|
||||||
|
if (wowel.indexOf(THDLWylieConstants.reverse_I_VOWEL) >= 0) {
|
||||||
|
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.reverse_I_VOWEL, context_added);
|
||||||
|
} else if (wowel.indexOf(THDLWylieConstants.I_VOWEL) >= 0) {
|
||||||
|
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.I_VOWEL, context_added);
|
||||||
|
}
|
||||||
|
if (wowel.indexOf(THDLWylieConstants.A_VOWEL) >= 0) {
|
||||||
|
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.A_VOWEL, context_added);
|
||||||
|
}
|
||||||
|
if (wowel.indexOf(THDLWylieConstants.ai_VOWEL) >= 0) {
|
||||||
|
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.ai_VOWEL, context_added);
|
||||||
|
}
|
||||||
|
if (wowel.indexOf(THDLWylieConstants.au_VOWEL) >= 0) {
|
||||||
|
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.au_VOWEL, context_added);
|
||||||
|
}
|
||||||
|
if (wowel.indexOf(THDLWylieConstants.reverse_i_VOWEL) >= 0) {
|
||||||
|
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.reverse_i_VOWEL, context_added);
|
||||||
|
} else if (wowel.indexOf(THDLWylieConstants.i_VOWEL) >= 0) {
|
||||||
|
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.i_VOWEL, context_added);
|
||||||
|
}
|
||||||
|
if (wowel.indexOf(THDLWylieConstants.e_VOWEL) >= 0) {
|
||||||
|
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.e_VOWEL, context_added);
|
||||||
|
}
|
||||||
|
if (wowel.indexOf(THDLWylieConstants.o_VOWEL) >= 0) {
|
||||||
|
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.o_VOWEL, context_added);
|
||||||
|
}
|
||||||
|
if (wowel.indexOf(THDLWylieConstants.u_VOWEL) >= 0) {
|
||||||
|
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.u_VOWEL, context_added);
|
||||||
|
}
|
||||||
|
if (wowel.indexOf("~X") >= 0) { // TODO(DLC)[EWTS->Tibetan]: introduce THDLWylieConstants.blah
|
||||||
|
duff.add(TibetanMachineWeb.getGlyph("~X"));
|
||||||
|
} else if (wowel.indexOf("X") >= 0) { // TODO(DLC)[EWTS->Tibetan]: introduce THDLWylieConstants.blah
|
||||||
|
duff.add(TibetanMachineWeb.getGlyph("X"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// FIXME: Use TMW9.61, the "o'i" special combination, when appropriate.
|
||||||
|
|
||||||
|
if (wowel.indexOf('M') >= 0) {
|
||||||
|
DuffCode last = null;
|
||||||
|
if (duff.size() > 0) {
|
||||||
|
last = (DuffCode)duff.get(duff.size() - 1);
|
||||||
|
duff.remove(duff.size() - 1); // getBindu will add it back...
|
||||||
|
// TODO(DLC)[EWTS->Tibetan]: is this okay???? when is a bindu okay to be alone???
|
||||||
|
}
|
||||||
|
TibTextUtils.getBindu(duff, last);
|
||||||
|
}
|
||||||
|
if (wowel.indexOf('H') >= 0)
|
||||||
|
duff.add(TibetanMachineWeb.getGlyph("H"));
|
||||||
|
|
||||||
|
|
||||||
|
// TODO(DLC)[EWTS->Tibetan]: verify that no part of wowel is discarded! acip does that. 'jam~X I think we screw up, e.g.
|
||||||
|
|
||||||
|
// TODO(DLC)[EWTS->Tibetan]:: are bindus are screwed up in the unicode output? i see (with tmuni font) lone bindus without glyphs to stack on
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getUnicodeForWowel(String wowel) {
|
public String getUnicodeForWowel(String wowel) {
|
||||||
|
@ -223,7 +288,12 @@ public final class EWTSTraits implements TTraits {
|
||||||
{
|
{
|
||||||
boolean already_done = true;
|
boolean already_done = true;
|
||||||
for (int i = 0; i < l.length(); i++) {
|
for (int i = 0; i < l.length(); i++) {
|
||||||
if (!(l.charAt(0) >= '\u0f00' && l.charAt(0) <= '\u0fff')) {
|
char ch = l.charAt(i);
|
||||||
|
if ((ch < '\u0f00' || ch > '\u0fff')
|
||||||
|
&& '\n' != ch
|
||||||
|
&& '\r' != ch) {
|
||||||
|
// TODO(DLC)[EWTS->Tibetan]: Is this the place
|
||||||
|
// where we want to interpret how newlines work???
|
||||||
already_done = false;
|
already_done = false;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
|
@ -61,6 +61,9 @@ class EWTSTshegBarScanner extends TTshegBarScanner {
|
||||||
StringBuffer sb = new StringBuffer(s);
|
StringBuffer sb = new StringBuffer(s);
|
||||||
ExpandEscapeSequences(sb);
|
ExpandEscapeSequences(sb);
|
||||||
int sl = sb.length();
|
int sl = sb.length();
|
||||||
|
// TODO(DLC)[EWTS->Tibetan]:: '@#', in ewts->tmw, is not working
|
||||||
|
// TODO(DLC)[EWTS->Tibetan]:: 'jamX 'jam~X one is not working in ->tmw mode
|
||||||
|
// TODO(DLC)[EWTS->Tibetan]:: dzaHsogs is not working
|
||||||
for (int i = 0; i < sl; i++) {
|
for (int i = 0; i < sl; i++) {
|
||||||
if (isValidInsideTshegBar(sb.charAt(i))) {
|
if (isValidInsideTshegBar(sb.charAt(i))) {
|
||||||
StringBuffer tbsb = new StringBuffer();
|
StringBuffer tbsb = new StringBuffer();
|
||||||
|
@ -75,7 +78,7 @@ class EWTSTshegBarScanner extends TTshegBarScanner {
|
||||||
al.add(new TString("EWTS", tbsb.toString(),
|
al.add(new TString("EWTS", tbsb.toString(),
|
||||||
TString.TIBETAN_NON_PUNCTUATION));
|
TString.TIBETAN_NON_PUNCTUATION));
|
||||||
} else {
|
} else {
|
||||||
if (" /;|!:=_@#$%<>()\r\n\t".indexOf(sb.charAt(i)) >= 0)
|
if (" /;|!:=_@#$%<>()\r\n\t*".indexOf(sb.charAt(i)) >= 0)
|
||||||
al.add(new TString("EWTS", sb.substring(i, i+1),
|
al.add(new TString("EWTS", sb.substring(i, i+1),
|
||||||
TString.TIBETAN_PUNCTUATION));
|
TString.TIBETAN_PUNCTUATION));
|
||||||
else
|
else
|
||||||
|
|
|
@ -327,13 +327,15 @@ class TParseTree {
|
||||||
translit,
|
translit,
|
||||||
traits);
|
traits);
|
||||||
} else {
|
} else {
|
||||||
if (bestParse.hasStackWithoutVowel(pl, isLastStack)) {
|
if (bestParse.hasStackWithoutVowel(traits.isACIP(),
|
||||||
|
pl, isLastStack)) {
|
||||||
if (isLastStack[0]) {
|
if (isLastStack[0]) {
|
||||||
if (ErrorsAndWarnings.isEnabled(502, warningLevel))
|
if (ErrorsAndWarnings.isEnabled(502, warningLevel))
|
||||||
return ErrorsAndWarnings.getMessage(502, shortMessages,
|
return ErrorsAndWarnings.getMessage(502, shortMessages,
|
||||||
translit,
|
translit,
|
||||||
traits);
|
traits);
|
||||||
} else {
|
} else {
|
||||||
|
if (traits.isACIP())
|
||||||
throw new Error("Can't happen now that we stack greedily");
|
throw new Error("Can't happen now that we stack greedily");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -343,13 +345,15 @@ class TParseTree {
|
||||||
traits);
|
traits);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (nip.get(0).hasStackWithoutVowel(pl, isLastStack)) {
|
if (nip.get(0).hasStackWithoutVowel(traits.isACIP(),
|
||||||
|
pl, isLastStack)) {
|
||||||
if (isLastStack[0]) {
|
if (isLastStack[0]) {
|
||||||
if (ErrorsAndWarnings.isEnabled(502, warningLevel))
|
if (ErrorsAndWarnings.isEnabled(502, warningLevel))
|
||||||
return ErrorsAndWarnings.getMessage(502, shortMessages,
|
return ErrorsAndWarnings.getMessage(502, shortMessages,
|
||||||
translit,
|
translit,
|
||||||
traits);
|
traits);
|
||||||
} else {
|
} else {
|
||||||
|
if (traits.isACIP())
|
||||||
throw new Error("Can't happen now that we stack greedily [2]");
|
throw new Error("Can't happen now that we stack greedily [2]");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -193,14 +193,15 @@ class TStackList {
|
||||||
|
|
||||||
/** Returns true if and only if this stack list contains a stack
|
/** Returns true if and only if this stack list contains a stack
|
||||||
* that does not end in a vowel or disambiguator. Note that this
|
* that does not end in a vowel or disambiguator. Note that this
|
||||||
* is not erroneous for legal Tibetan like {BRTAN}, where {B} has
|
* is not erroneous for legal Tibetan like ACIP {BRTAN}, where {B} has
|
||||||
* no vowel, but it is a warning sign for Sanskrit stacks.
|
* no vowel, but it is a warning sign for Sanskrit stacks.
|
||||||
|
* @param isACIP true iff opl is ACIP (not EWTS)
|
||||||
* @param opl the pair list from which this stack list
|
* @param opl the pair list from which this stack list
|
||||||
* originated
|
* originated
|
||||||
* @param isLastStack if non-null, then isLastStack[0] will be
|
* @param isLastStack if non-null, then isLastStack[0] will be
|
||||||
* set to true if and only if the very last stack is the only
|
* set to true if and only if the very last stack is the only
|
||||||
* stack not to have a vowel or disambiguator on it */
|
* stack not to have a vowel or disambiguator on it */
|
||||||
boolean hasStackWithoutVowel(TPairList opl, boolean[] isLastStack) {
|
boolean hasStackWithoutVowel(boolean isACIP, TPairList opl, boolean[] isLastStack) {
|
||||||
int runningSize = 0;
|
int runningSize = 0;
|
||||||
// FIXME: MARDA is MARD==MAR-D to us, but is probably MAR+DA, warn -- see 838470
|
// FIXME: MARDA is MARD==MAR-D to us, but is probably MAR+DA, warn -- see 838470
|
||||||
for (int i = 0; i < size(); i++) {
|
for (int i = 0; i < size(); i++) {
|
||||||
|
@ -213,15 +214,16 @@ class TStackList {
|
||||||
&& l.charAt(0) >= '0' && l.charAt(0) <= '9')) {
|
&& l.charAt(0) >= '0' && l.charAt(0) <= '9')) {
|
||||||
if (null != isLastStack) {
|
if (null != isLastStack) {
|
||||||
isLastStack[0] = (i + 1 == size());
|
isLastStack[0] = (i + 1 == size());
|
||||||
if (!isLastStack[0]) {
|
if (!isLastStack[0] && isACIP) {
|
||||||
throw new Error("But we now stack greedily!");
|
throw new Error("But we now stack greedily!");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (runningSize != opl.sizeMinusDisambiguators())
|
if (runningSize != opl.sizeMinusDisambiguators()) {
|
||||||
throw new IllegalArgumentException("runningSize = " + runningSize + "; opl.sizeMinusDisambiguators = " + opl.sizeMinusDisambiguators() + "; opl (" + opl + ") is bad for this stack list (" + toString() + ")");
|
throw new IllegalArgumentException("runningSize = " + runningSize + "; opl.sizeMinusDisambiguators = " + opl.sizeMinusDisambiguators() + "; opl (" + opl + ") is bad for this stack list (" + toString() + ")");
|
||||||
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -34,7 +34,7 @@ import org.thdl.tib.text.DuffCode;
|
||||||
*
|
*
|
||||||
* <p>It is very likely that classes that implement this interface
|
* <p>It is very likely that classes that implement this interface
|
||||||
* will choose to use the design pattern 'singleton'. */
|
* will choose to use the design pattern 'singleton'. */
|
||||||
interface TTraits {
|
public interface TTraits {
|
||||||
/** Returns the disambiguator for this transliteration scheme,
|
/** Returns the disambiguator for this transliteration scheme,
|
||||||
* which had better be a string containing just one character
|
* which had better be a string containing just one character
|
||||||
* lest {@link #disambiguatorChar()} become nonsensical for
|
* lest {@link #disambiguatorChar()} become nonsensical for
|
||||||
|
|
Loading…
Reference in a new issue