The scaffolding for a Unicode->EWTS reverter. No guts yet.

This commit is contained in:
dchandler 2005-07-17 03:32:57 +00:00
parent ebc11a3425
commit 00afd75362
9 changed files with 282 additions and 16 deletions

View file

@ -97,14 +97,14 @@ class ConvertDialog extends JDialog
|| choices.getSelectedItem() == WYLIE_TO_UNI_TEXT);
}
private javax.swing.filechooser.FileFilter acipff, rtfff;
private javax.swing.filechooser.FileFilter textFileFilter, rtfFileFilter;
private void init()
{
jfc = new JFileChooser(controller.getDefaultDirectory());
jfc.setDialogTitle(LOCATE_FILE);
jfc.addChoosableFileFilter(acipff = new ACIPFileFilter());
jfc.addChoosableFileFilter(rtfff = new RTFFileFilter());
jfc.addChoosableFileFilter(textFileFilter = new TextFileFilter());
jfc.addChoosableFileFilter(rtfFileFilter = new RTFFileFilter());
content = new JPanel(new GridLayout(0,1));
JPanel temp = new JPanel(new FlowLayout(FlowLayout.CENTER,5,5));
@ -186,7 +186,7 @@ class ConvertDialog extends JDialog
content.add(buttonBox);
setContentPane(content);
pack();
setSize(new Dimension(600,240));
setSize(new Dimension(760,340));
}
private void setChoices(String[] choices)
@ -241,15 +241,17 @@ class ConvertDialog extends JDialog
if (src == browseOld) {
jfc.setFileFilter((ACIP_TO_UNI_TEXT.equals((String)choices.getSelectedItem())
|| WYLIE_TO_UNI_TEXT.equals((String)choices.getSelectedItem())
|| UNI_TO_WYLIE_TEXT.equals((String)choices.getSelectedItem())
|| ACIP_TO_TMW.equals((String)choices.getSelectedItem())
|| WYLIE_TO_TMW.equals((String)choices.getSelectedItem()))
? acipff : rtfff);
? textFileFilter : rtfFileFilter);
} else {
jfc.setFileFilter((ACIP_TO_UNI_TEXT.equals((String)choices.getSelectedItem())
|| WYLIE_TO_UNI_TEXT.equals((String)choices.getSelectedItem())
|| UNI_TO_WYLIE_TEXT.equals((String)choices.getSelectedItem())
|| TMW_TO_ACIP_TEXT.equals((String)choices.getSelectedItem())
|| TMW_TO_WYLIE_TEXT.equals((String)choices.getSelectedItem()))
? acipff : rtfff);
? textFileFilter : rtfFileFilter);
}
if (jfc.showOpenDialog(this) != jfc.APPROVE_OPTION)
return;
@ -445,13 +447,25 @@ class ConvertDialog extends JDialog
else
oldFileDirName = oldFileDirName + File.separator;
String oldFileNameSansThingy = of.getName();
if (oldFileNameSansThingy.startsWith("TMW_")) {
if (oldFileNameSansThingy.startsWith(suggested_TO_TMW_prefix)) {
oldFileNameSansThingy
= oldFileNameSansThingy.substring("TMW_".length(),
= oldFileNameSansThingy.substring(suggested_TO_TMW_prefix.length(),
oldFileNameSansThingy.length());
} else if (oldFileNameSansThingy.startsWith("TM_")) {
} else if (oldFileNameSansThingy.startsWith(suggested_TO_TM_prefix)) {
oldFileNameSansThingy
= oldFileNameSansThingy.substring("TM_".length(),
= oldFileNameSansThingy.substring(suggested_TO_TM_prefix.length(),
oldFileNameSansThingy.length());
} else if (oldFileNameSansThingy.startsWith(suggested_TO_UNI_prefix)) {
oldFileNameSansThingy
= oldFileNameSansThingy.substring(suggested_TO_UNI_prefix.length(),
oldFileNameSansThingy.length());
} else if (oldFileNameSansThingy.startsWith(suggested_ACIP_prefix)) {
oldFileNameSansThingy
= oldFileNameSansThingy.substring(suggested_ACIP_prefix.length(),
oldFileNameSansThingy.length());
} else if (oldFileNameSansThingy.startsWith(suggested_WYLIE_prefix)) {
oldFileNameSansThingy
= oldFileNameSansThingy.substring(suggested_WYLIE_prefix.length(),
oldFileNameSansThingy.length());
} else if (oldFileNameSansThingy.startsWith("TMW")) {
oldFileNameSansThingy
@ -481,8 +495,11 @@ class ConvertDialog extends JDialog
newFileNamePrefix = "TMW_to_same_TMW__";
newFileNameExtension = ".RTF";
} else { // conversion mode
if (TMW_TO_WYLIE == ct) {
if (TMW_TO_WYLIE == ct
|| UNI_TO_WYLIE_TEXT == ct) {
newFileNamePrefix = suggested_WYLIE_prefix;
if (UNI_TO_WYLIE_TEXT == ct)
newFileNameExtension = ".TXT";
} else if (TMW_TO_WYLIE_TEXT == ct) {
newFileNamePrefix = suggested_WYLIE_prefix;
newFileNameExtension = ".TXT";
@ -531,8 +548,7 @@ class ConvertDialog extends JDialog
}
}
// TODO(DLC)[EWTS->Tibetan]: we use for wylie (ewts) too...
public class ACIPFileFilter extends javax.swing.filechooser.FileFilter
public class TextFileFilter extends javax.swing.filechooser.FileFilter
{
public boolean accept(File f)
{

View file

@ -24,6 +24,7 @@ package org.thdl.tib.input;
@author Nathaniel Garson, Tibetan and Himalayan Digital Library */
interface FontConverterConstants
{
final String UNI_TO_WYLIE_TEXT = "Unicode to Wylie (UTF-8 Text->Text)";
final String WYLIE_TO_UNI_TEXT = "Wylie to Unicode (Text->Text)";
final String WYLIE_TO_TMW = "Wylie to TMW (Text->RTF)";
final String TMW_TO_SAME_TMW = "TMW to the same TMW (for testing only) (RTF->RTF)";
@ -60,6 +61,7 @@ interface FontConverterConstants
};
final String[] DEBUG_CHOICES = new String[] {
UNI_TO_WYLIE_TEXT,
TMW_TO_SAME_TMW,
WYLIE_TO_UNI_TEXT,
WYLIE_TO_TMW,

View file

@ -18,10 +18,12 @@ Contributor(s): ______________________________________.
package org.thdl.tib.input;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.PrintStream;
import java.util.ArrayList;
@ -31,6 +33,7 @@ import javax.swing.text.StyleConstants;
import javax.swing.text.rtf.RTFEditorKit;
import org.thdl.tib.text.TibetanDocument;
import org.thdl.tib.text.reverter.Converter;
import org.thdl.tib.text.ttt.ACIPTraits;
import org.thdl.tib.text.ttt.EWTSTraits;
import org.thdl.tib.text.ttt.TConverter;
@ -89,6 +92,7 @@ public class TibetanConverter implements FontConverterConstants {
boolean convertToWylieTextMode = false;
boolean convertToACIPRTFMode = false;
boolean convertToACIPTextMode = false;
boolean convertUniToWylieTextMode = false;
boolean findSomeNonTMWMode = false;
boolean findAllNonTMWMode = false;
boolean findSomeNonTMMode = false;
@ -123,6 +127,8 @@ public class TibetanConverter implements FontConverterConstants {
= args[numArgs - 2].equals("--tmw-to-tmw-for-testing"))
|| (convertToTMMode
= args[numArgs - 2].equals("--to-tibetan-machine"))
|| (convertUniToWylieTextMode
= args[numArgs - 2].equals("--utf8-text-to-ewts-text"))
|| (convertToTMWMode
= args[numArgs - 2].equals("--to-tibetan-machine-web"))
|| (convertACIPToUniMode
@ -224,7 +230,8 @@ public class TibetanConverter implements FontConverterConstants {
out.println("are in your document waiting for your personal attention,");
out.println("43 if not even one glyph found was eligible for this conversion, which means");
out.println("that you probably selected the wrong conversion or the wrong document, or ");
out.println("nonzero otherwise.");
out.println("nonzero on some other error.");
// TODO(dchandler): describe 47 48 50 etc.
out.println("");
out.println("You may find it helpful to use `--find-some-non-tmw' mode (or");
out.println("`--find-some-non-tm' mode for Tibetan Machine input) before doing a");
@ -266,6 +273,8 @@ public class TibetanConverter implements FontConverterConstants {
conversionTag = TMW_TO_WYLIE;
} else if (convertToWylieTextMode) {
conversionTag = TMW_TO_WYLIE_TEXT;
} else if (convertUniToWylieTextMode) {
conversionTag = UNI_TO_WYLIE_TEXT;
} else if (convertToACIPRTFMode) {
conversionTag = TMW_TO_ACIP;
} else if (convertToACIPTextMode) {
@ -320,8 +329,36 @@ public class TibetanConverter implements FontConverterConstants {
static int reallyConvert(InputStream in, PrintStream out, String ct,
String warningLevel, boolean shortMessages,
boolean colors) {
if (ACIP_TO_UNI_TEXT == ct || ACIP_TO_TMW == ct
|| WYLIE_TO_UNI_TEXT == ct || WYLIE_TO_TMW == ct) {
if (UNI_TO_WYLIE_TEXT == ct) {
try {
String uniText;
{
// TODO(dchandler): use, here and elsewhere in the
// codebase,
// org.apache.commons.io.IOUtils.toString(InputStream,
// encoding)
StringBuffer s = new StringBuffer();
char ch[] = new char[8192];
BufferedReader bin
= new BufferedReader(new InputStreamReader(in,
"UTF-8"));
int amt;
while (-1 != (amt = bin.read(ch))) {
s.append(ch, 0, amt);
}
bin.close();
uniText = s.toString();
}
StringBuffer errors = new StringBuffer();
String ewtsText = Converter.convertToEwts(uniText, errors);
// TODO(dchandler): is 51 the right choice?
return (errors.length() > 0) ? 51 : 0;
} catch (IOException e) {
// TODO(dchandler): print it? where to?
return 48;
}
} else if (ACIP_TO_UNI_TEXT == ct || ACIP_TO_TMW == ct
|| WYLIE_TO_UNI_TEXT == ct || WYLIE_TO_TMW == ct) {
try {
ArrayList al
= ((ACIP_TO_UNI_TEXT == ct || ACIP_TO_TMW == ct)
@ -364,6 +401,7 @@ public class TibetanConverter implements FontConverterConstants {
else
return 0;
} catch (IOException e) {
// TODO(dchandler): print it? where to?
return 48;
}
} else {