The scaffolding for a Unicode->EWTS reverter. No guts yet.
This commit is contained in:
parent
ebc11a3425
commit
00afd75362
9 changed files with 282 additions and 16 deletions
|
@ -97,14 +97,14 @@ class ConvertDialog extends JDialog
|
|||
|| choices.getSelectedItem() == WYLIE_TO_UNI_TEXT);
|
||||
}
|
||||
|
||||
private javax.swing.filechooser.FileFilter acipff, rtfff;
|
||||
private javax.swing.filechooser.FileFilter textFileFilter, rtfFileFilter;
|
||||
|
||||
private void init()
|
||||
{
|
||||
jfc = new JFileChooser(controller.getDefaultDirectory());
|
||||
jfc.setDialogTitle(LOCATE_FILE);
|
||||
jfc.addChoosableFileFilter(acipff = new ACIPFileFilter());
|
||||
jfc.addChoosableFileFilter(rtfff = new RTFFileFilter());
|
||||
jfc.addChoosableFileFilter(textFileFilter = new TextFileFilter());
|
||||
jfc.addChoosableFileFilter(rtfFileFilter = new RTFFileFilter());
|
||||
|
||||
content = new JPanel(new GridLayout(0,1));
|
||||
JPanel temp = new JPanel(new FlowLayout(FlowLayout.CENTER,5,5));
|
||||
|
@ -186,7 +186,7 @@ class ConvertDialog extends JDialog
|
|||
content.add(buttonBox);
|
||||
setContentPane(content);
|
||||
pack();
|
||||
setSize(new Dimension(600,240));
|
||||
setSize(new Dimension(760,340));
|
||||
}
|
||||
|
||||
private void setChoices(String[] choices)
|
||||
|
@ -241,15 +241,17 @@ class ConvertDialog extends JDialog
|
|||
if (src == browseOld) {
|
||||
jfc.setFileFilter((ACIP_TO_UNI_TEXT.equals((String)choices.getSelectedItem())
|
||||
|| WYLIE_TO_UNI_TEXT.equals((String)choices.getSelectedItem())
|
||||
|| UNI_TO_WYLIE_TEXT.equals((String)choices.getSelectedItem())
|
||||
|| ACIP_TO_TMW.equals((String)choices.getSelectedItem())
|
||||
|| WYLIE_TO_TMW.equals((String)choices.getSelectedItem()))
|
||||
? acipff : rtfff);
|
||||
? textFileFilter : rtfFileFilter);
|
||||
} else {
|
||||
jfc.setFileFilter((ACIP_TO_UNI_TEXT.equals((String)choices.getSelectedItem())
|
||||
|| WYLIE_TO_UNI_TEXT.equals((String)choices.getSelectedItem())
|
||||
|| UNI_TO_WYLIE_TEXT.equals((String)choices.getSelectedItem())
|
||||
|| TMW_TO_ACIP_TEXT.equals((String)choices.getSelectedItem())
|
||||
|| TMW_TO_WYLIE_TEXT.equals((String)choices.getSelectedItem()))
|
||||
? acipff : rtfff);
|
||||
? textFileFilter : rtfFileFilter);
|
||||
}
|
||||
if (jfc.showOpenDialog(this) != jfc.APPROVE_OPTION)
|
||||
return;
|
||||
|
@ -445,13 +447,25 @@ class ConvertDialog extends JDialog
|
|||
else
|
||||
oldFileDirName = oldFileDirName + File.separator;
|
||||
String oldFileNameSansThingy = of.getName();
|
||||
if (oldFileNameSansThingy.startsWith("TMW_")) {
|
||||
if (oldFileNameSansThingy.startsWith(suggested_TO_TMW_prefix)) {
|
||||
oldFileNameSansThingy
|
||||
= oldFileNameSansThingy.substring("TMW_".length(),
|
||||
= oldFileNameSansThingy.substring(suggested_TO_TMW_prefix.length(),
|
||||
oldFileNameSansThingy.length());
|
||||
} else if (oldFileNameSansThingy.startsWith("TM_")) {
|
||||
} else if (oldFileNameSansThingy.startsWith(suggested_TO_TM_prefix)) {
|
||||
oldFileNameSansThingy
|
||||
= oldFileNameSansThingy.substring("TM_".length(),
|
||||
= oldFileNameSansThingy.substring(suggested_TO_TM_prefix.length(),
|
||||
oldFileNameSansThingy.length());
|
||||
} else if (oldFileNameSansThingy.startsWith(suggested_TO_UNI_prefix)) {
|
||||
oldFileNameSansThingy
|
||||
= oldFileNameSansThingy.substring(suggested_TO_UNI_prefix.length(),
|
||||
oldFileNameSansThingy.length());
|
||||
} else if (oldFileNameSansThingy.startsWith(suggested_ACIP_prefix)) {
|
||||
oldFileNameSansThingy
|
||||
= oldFileNameSansThingy.substring(suggested_ACIP_prefix.length(),
|
||||
oldFileNameSansThingy.length());
|
||||
} else if (oldFileNameSansThingy.startsWith(suggested_WYLIE_prefix)) {
|
||||
oldFileNameSansThingy
|
||||
= oldFileNameSansThingy.substring(suggested_WYLIE_prefix.length(),
|
||||
oldFileNameSansThingy.length());
|
||||
} else if (oldFileNameSansThingy.startsWith("TMW")) {
|
||||
oldFileNameSansThingy
|
||||
|
@ -481,8 +495,11 @@ class ConvertDialog extends JDialog
|
|||
newFileNamePrefix = "TMW_to_same_TMW__";
|
||||
newFileNameExtension = ".RTF";
|
||||
} else { // conversion mode
|
||||
if (TMW_TO_WYLIE == ct) {
|
||||
if (TMW_TO_WYLIE == ct
|
||||
|| UNI_TO_WYLIE_TEXT == ct) {
|
||||
newFileNamePrefix = suggested_WYLIE_prefix;
|
||||
if (UNI_TO_WYLIE_TEXT == ct)
|
||||
newFileNameExtension = ".TXT";
|
||||
} else if (TMW_TO_WYLIE_TEXT == ct) {
|
||||
newFileNamePrefix = suggested_WYLIE_prefix;
|
||||
newFileNameExtension = ".TXT";
|
||||
|
@ -531,8 +548,7 @@ class ConvertDialog extends JDialog
|
|||
}
|
||||
}
|
||||
|
||||
// TODO(DLC)[EWTS->Tibetan]: we use for wylie (ewts) too...
|
||||
public class ACIPFileFilter extends javax.swing.filechooser.FileFilter
|
||||
public class TextFileFilter extends javax.swing.filechooser.FileFilter
|
||||
{
|
||||
public boolean accept(File f)
|
||||
{
|
||||
|
|
|
@ -24,6 +24,7 @@ package org.thdl.tib.input;
|
|||
@author Nathaniel Garson, Tibetan and Himalayan Digital Library */
|
||||
interface FontConverterConstants
|
||||
{
|
||||
final String UNI_TO_WYLIE_TEXT = "Unicode to Wylie (UTF-8 Text->Text)";
|
||||
final String WYLIE_TO_UNI_TEXT = "Wylie to Unicode (Text->Text)";
|
||||
final String WYLIE_TO_TMW = "Wylie to TMW (Text->RTF)";
|
||||
final String TMW_TO_SAME_TMW = "TMW to the same TMW (for testing only) (RTF->RTF)";
|
||||
|
@ -60,6 +61,7 @@ interface FontConverterConstants
|
|||
};
|
||||
|
||||
final String[] DEBUG_CHOICES = new String[] {
|
||||
UNI_TO_WYLIE_TEXT,
|
||||
TMW_TO_SAME_TMW,
|
||||
WYLIE_TO_UNI_TEXT,
|
||||
WYLIE_TO_TMW,
|
||||
|
|
|
@ -18,10 +18,12 @@ Contributor(s): ______________________________________.
|
|||
|
||||
package org.thdl.tib.input;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.BufferedWriter;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.OutputStreamWriter;
|
||||
import java.io.PrintStream;
|
||||
import java.util.ArrayList;
|
||||
|
@ -31,6 +33,7 @@ import javax.swing.text.StyleConstants;
|
|||
import javax.swing.text.rtf.RTFEditorKit;
|
||||
|
||||
import org.thdl.tib.text.TibetanDocument;
|
||||
import org.thdl.tib.text.reverter.Converter;
|
||||
import org.thdl.tib.text.ttt.ACIPTraits;
|
||||
import org.thdl.tib.text.ttt.EWTSTraits;
|
||||
import org.thdl.tib.text.ttt.TConverter;
|
||||
|
@ -89,6 +92,7 @@ public class TibetanConverter implements FontConverterConstants {
|
|||
boolean convertToWylieTextMode = false;
|
||||
boolean convertToACIPRTFMode = false;
|
||||
boolean convertToACIPTextMode = false;
|
||||
boolean convertUniToWylieTextMode = false;
|
||||
boolean findSomeNonTMWMode = false;
|
||||
boolean findAllNonTMWMode = false;
|
||||
boolean findSomeNonTMMode = false;
|
||||
|
@ -123,6 +127,8 @@ public class TibetanConverter implements FontConverterConstants {
|
|||
= args[numArgs - 2].equals("--tmw-to-tmw-for-testing"))
|
||||
|| (convertToTMMode
|
||||
= args[numArgs - 2].equals("--to-tibetan-machine"))
|
||||
|| (convertUniToWylieTextMode
|
||||
= args[numArgs - 2].equals("--utf8-text-to-ewts-text"))
|
||||
|| (convertToTMWMode
|
||||
= args[numArgs - 2].equals("--to-tibetan-machine-web"))
|
||||
|| (convertACIPToUniMode
|
||||
|
@ -224,7 +230,8 @@ public class TibetanConverter implements FontConverterConstants {
|
|||
out.println("are in your document waiting for your personal attention,");
|
||||
out.println("43 if not even one glyph found was eligible for this conversion, which means");
|
||||
out.println("that you probably selected the wrong conversion or the wrong document, or ");
|
||||
out.println("nonzero otherwise.");
|
||||
out.println("nonzero on some other error.");
|
||||
// TODO(dchandler): describe 47 48 50 etc.
|
||||
out.println("");
|
||||
out.println("You may find it helpful to use `--find-some-non-tmw' mode (or");
|
||||
out.println("`--find-some-non-tm' mode for Tibetan Machine input) before doing a");
|
||||
|
@ -266,6 +273,8 @@ public class TibetanConverter implements FontConverterConstants {
|
|||
conversionTag = TMW_TO_WYLIE;
|
||||
} else if (convertToWylieTextMode) {
|
||||
conversionTag = TMW_TO_WYLIE_TEXT;
|
||||
} else if (convertUniToWylieTextMode) {
|
||||
conversionTag = UNI_TO_WYLIE_TEXT;
|
||||
} else if (convertToACIPRTFMode) {
|
||||
conversionTag = TMW_TO_ACIP;
|
||||
} else if (convertToACIPTextMode) {
|
||||
|
@ -320,8 +329,36 @@ public class TibetanConverter implements FontConverterConstants {
|
|||
static int reallyConvert(InputStream in, PrintStream out, String ct,
|
||||
String warningLevel, boolean shortMessages,
|
||||
boolean colors) {
|
||||
if (ACIP_TO_UNI_TEXT == ct || ACIP_TO_TMW == ct
|
||||
|| WYLIE_TO_UNI_TEXT == ct || WYLIE_TO_TMW == ct) {
|
||||
if (UNI_TO_WYLIE_TEXT == ct) {
|
||||
try {
|
||||
String uniText;
|
||||
{
|
||||
// TODO(dchandler): use, here and elsewhere in the
|
||||
// codebase,
|
||||
// org.apache.commons.io.IOUtils.toString(InputStream,
|
||||
// encoding)
|
||||
StringBuffer s = new StringBuffer();
|
||||
char ch[] = new char[8192];
|
||||
BufferedReader bin
|
||||
= new BufferedReader(new InputStreamReader(in,
|
||||
"UTF-8"));
|
||||
int amt;
|
||||
while (-1 != (amt = bin.read(ch))) {
|
||||
s.append(ch, 0, amt);
|
||||
}
|
||||
bin.close();
|
||||
uniText = s.toString();
|
||||
}
|
||||
StringBuffer errors = new StringBuffer();
|
||||
String ewtsText = Converter.convertToEwts(uniText, errors);
|
||||
// TODO(dchandler): is 51 the right choice?
|
||||
return (errors.length() > 0) ? 51 : 0;
|
||||
} catch (IOException e) {
|
||||
// TODO(dchandler): print it? where to?
|
||||
return 48;
|
||||
}
|
||||
} else if (ACIP_TO_UNI_TEXT == ct || ACIP_TO_TMW == ct
|
||||
|| WYLIE_TO_UNI_TEXT == ct || WYLIE_TO_TMW == ct) {
|
||||
try {
|
||||
ArrayList al
|
||||
= ((ACIP_TO_UNI_TEXT == ct || ACIP_TO_TMW == ct)
|
||||
|
@ -364,6 +401,7 @@ public class TibetanConverter implements FontConverterConstants {
|
|||
else
|
||||
return 0;
|
||||
} catch (IOException e) {
|
||||
// TODO(dchandler): print it? where to?
|
||||
return 48;
|
||||
}
|
||||
} else {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue