The scaffolding for a Unicode->EWTS reverter. No guts yet.
This commit is contained in:
parent
ebc11a3425
commit
00afd75362
9 changed files with 282 additions and 16 deletions
|
@ -97,14 +97,14 @@ class ConvertDialog extends JDialog
|
|||
|| choices.getSelectedItem() == WYLIE_TO_UNI_TEXT);
|
||||
}
|
||||
|
||||
private javax.swing.filechooser.FileFilter acipff, rtfff;
|
||||
private javax.swing.filechooser.FileFilter textFileFilter, rtfFileFilter;
|
||||
|
||||
private void init()
|
||||
{
|
||||
jfc = new JFileChooser(controller.getDefaultDirectory());
|
||||
jfc.setDialogTitle(LOCATE_FILE);
|
||||
jfc.addChoosableFileFilter(acipff = new ACIPFileFilter());
|
||||
jfc.addChoosableFileFilter(rtfff = new RTFFileFilter());
|
||||
jfc.addChoosableFileFilter(textFileFilter = new TextFileFilter());
|
||||
jfc.addChoosableFileFilter(rtfFileFilter = new RTFFileFilter());
|
||||
|
||||
content = new JPanel(new GridLayout(0,1));
|
||||
JPanel temp = new JPanel(new FlowLayout(FlowLayout.CENTER,5,5));
|
||||
|
@ -186,7 +186,7 @@ class ConvertDialog extends JDialog
|
|||
content.add(buttonBox);
|
||||
setContentPane(content);
|
||||
pack();
|
||||
setSize(new Dimension(600,240));
|
||||
setSize(new Dimension(760,340));
|
||||
}
|
||||
|
||||
private void setChoices(String[] choices)
|
||||
|
@ -241,15 +241,17 @@ class ConvertDialog extends JDialog
|
|||
if (src == browseOld) {
|
||||
jfc.setFileFilter((ACIP_TO_UNI_TEXT.equals((String)choices.getSelectedItem())
|
||||
|| WYLIE_TO_UNI_TEXT.equals((String)choices.getSelectedItem())
|
||||
|| UNI_TO_WYLIE_TEXT.equals((String)choices.getSelectedItem())
|
||||
|| ACIP_TO_TMW.equals((String)choices.getSelectedItem())
|
||||
|| WYLIE_TO_TMW.equals((String)choices.getSelectedItem()))
|
||||
? acipff : rtfff);
|
||||
? textFileFilter : rtfFileFilter);
|
||||
} else {
|
||||
jfc.setFileFilter((ACIP_TO_UNI_TEXT.equals((String)choices.getSelectedItem())
|
||||
|| WYLIE_TO_UNI_TEXT.equals((String)choices.getSelectedItem())
|
||||
|| UNI_TO_WYLIE_TEXT.equals((String)choices.getSelectedItem())
|
||||
|| TMW_TO_ACIP_TEXT.equals((String)choices.getSelectedItem())
|
||||
|| TMW_TO_WYLIE_TEXT.equals((String)choices.getSelectedItem()))
|
||||
? acipff : rtfff);
|
||||
? textFileFilter : rtfFileFilter);
|
||||
}
|
||||
if (jfc.showOpenDialog(this) != jfc.APPROVE_OPTION)
|
||||
return;
|
||||
|
@ -445,13 +447,25 @@ class ConvertDialog extends JDialog
|
|||
else
|
||||
oldFileDirName = oldFileDirName + File.separator;
|
||||
String oldFileNameSansThingy = of.getName();
|
||||
if (oldFileNameSansThingy.startsWith("TMW_")) {
|
||||
if (oldFileNameSansThingy.startsWith(suggested_TO_TMW_prefix)) {
|
||||
oldFileNameSansThingy
|
||||
= oldFileNameSansThingy.substring("TMW_".length(),
|
||||
= oldFileNameSansThingy.substring(suggested_TO_TMW_prefix.length(),
|
||||
oldFileNameSansThingy.length());
|
||||
} else if (oldFileNameSansThingy.startsWith("TM_")) {
|
||||
} else if (oldFileNameSansThingy.startsWith(suggested_TO_TM_prefix)) {
|
||||
oldFileNameSansThingy
|
||||
= oldFileNameSansThingy.substring("TM_".length(),
|
||||
= oldFileNameSansThingy.substring(suggested_TO_TM_prefix.length(),
|
||||
oldFileNameSansThingy.length());
|
||||
} else if (oldFileNameSansThingy.startsWith(suggested_TO_UNI_prefix)) {
|
||||
oldFileNameSansThingy
|
||||
= oldFileNameSansThingy.substring(suggested_TO_UNI_prefix.length(),
|
||||
oldFileNameSansThingy.length());
|
||||
} else if (oldFileNameSansThingy.startsWith(suggested_ACIP_prefix)) {
|
||||
oldFileNameSansThingy
|
||||
= oldFileNameSansThingy.substring(suggested_ACIP_prefix.length(),
|
||||
oldFileNameSansThingy.length());
|
||||
} else if (oldFileNameSansThingy.startsWith(suggested_WYLIE_prefix)) {
|
||||
oldFileNameSansThingy
|
||||
= oldFileNameSansThingy.substring(suggested_WYLIE_prefix.length(),
|
||||
oldFileNameSansThingy.length());
|
||||
} else if (oldFileNameSansThingy.startsWith("TMW")) {
|
||||
oldFileNameSansThingy
|
||||
|
@ -481,8 +495,11 @@ class ConvertDialog extends JDialog
|
|||
newFileNamePrefix = "TMW_to_same_TMW__";
|
||||
newFileNameExtension = ".RTF";
|
||||
} else { // conversion mode
|
||||
if (TMW_TO_WYLIE == ct) {
|
||||
if (TMW_TO_WYLIE == ct
|
||||
|| UNI_TO_WYLIE_TEXT == ct) {
|
||||
newFileNamePrefix = suggested_WYLIE_prefix;
|
||||
if (UNI_TO_WYLIE_TEXT == ct)
|
||||
newFileNameExtension = ".TXT";
|
||||
} else if (TMW_TO_WYLIE_TEXT == ct) {
|
||||
newFileNamePrefix = suggested_WYLIE_prefix;
|
||||
newFileNameExtension = ".TXT";
|
||||
|
@ -531,8 +548,7 @@ class ConvertDialog extends JDialog
|
|||
}
|
||||
}
|
||||
|
||||
// TODO(DLC)[EWTS->Tibetan]: we use for wylie (ewts) too...
|
||||
public class ACIPFileFilter extends javax.swing.filechooser.FileFilter
|
||||
public class TextFileFilter extends javax.swing.filechooser.FileFilter
|
||||
{
|
||||
public boolean accept(File f)
|
||||
{
|
||||
|
|
|
@ -24,6 +24,7 @@ package org.thdl.tib.input;
|
|||
@author Nathaniel Garson, Tibetan and Himalayan Digital Library */
|
||||
interface FontConverterConstants
|
||||
{
|
||||
final String UNI_TO_WYLIE_TEXT = "Unicode to Wylie (UTF-8 Text->Text)";
|
||||
final String WYLIE_TO_UNI_TEXT = "Wylie to Unicode (Text->Text)";
|
||||
final String WYLIE_TO_TMW = "Wylie to TMW (Text->RTF)";
|
||||
final String TMW_TO_SAME_TMW = "TMW to the same TMW (for testing only) (RTF->RTF)";
|
||||
|
@ -60,6 +61,7 @@ interface FontConverterConstants
|
|||
};
|
||||
|
||||
final String[] DEBUG_CHOICES = new String[] {
|
||||
UNI_TO_WYLIE_TEXT,
|
||||
TMW_TO_SAME_TMW,
|
||||
WYLIE_TO_UNI_TEXT,
|
||||
WYLIE_TO_TMW,
|
||||
|
|
|
@ -18,10 +18,12 @@ Contributor(s): ______________________________________.
|
|||
|
||||
package org.thdl.tib.input;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.BufferedWriter;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.OutputStreamWriter;
|
||||
import java.io.PrintStream;
|
||||
import java.util.ArrayList;
|
||||
|
@ -31,6 +33,7 @@ import javax.swing.text.StyleConstants;
|
|||
import javax.swing.text.rtf.RTFEditorKit;
|
||||
|
||||
import org.thdl.tib.text.TibetanDocument;
|
||||
import org.thdl.tib.text.reverter.Converter;
|
||||
import org.thdl.tib.text.ttt.ACIPTraits;
|
||||
import org.thdl.tib.text.ttt.EWTSTraits;
|
||||
import org.thdl.tib.text.ttt.TConverter;
|
||||
|
@ -89,6 +92,7 @@ public class TibetanConverter implements FontConverterConstants {
|
|||
boolean convertToWylieTextMode = false;
|
||||
boolean convertToACIPRTFMode = false;
|
||||
boolean convertToACIPTextMode = false;
|
||||
boolean convertUniToWylieTextMode = false;
|
||||
boolean findSomeNonTMWMode = false;
|
||||
boolean findAllNonTMWMode = false;
|
||||
boolean findSomeNonTMMode = false;
|
||||
|
@ -123,6 +127,8 @@ public class TibetanConverter implements FontConverterConstants {
|
|||
= args[numArgs - 2].equals("--tmw-to-tmw-for-testing"))
|
||||
|| (convertToTMMode
|
||||
= args[numArgs - 2].equals("--to-tibetan-machine"))
|
||||
|| (convertUniToWylieTextMode
|
||||
= args[numArgs - 2].equals("--utf8-text-to-ewts-text"))
|
||||
|| (convertToTMWMode
|
||||
= args[numArgs - 2].equals("--to-tibetan-machine-web"))
|
||||
|| (convertACIPToUniMode
|
||||
|
@ -224,7 +230,8 @@ public class TibetanConverter implements FontConverterConstants {
|
|||
out.println("are in your document waiting for your personal attention,");
|
||||
out.println("43 if not even one glyph found was eligible for this conversion, which means");
|
||||
out.println("that you probably selected the wrong conversion or the wrong document, or ");
|
||||
out.println("nonzero otherwise.");
|
||||
out.println("nonzero on some other error.");
|
||||
// TODO(dchandler): describe 47 48 50 etc.
|
||||
out.println("");
|
||||
out.println("You may find it helpful to use `--find-some-non-tmw' mode (or");
|
||||
out.println("`--find-some-non-tm' mode for Tibetan Machine input) before doing a");
|
||||
|
@ -266,6 +273,8 @@ public class TibetanConverter implements FontConverterConstants {
|
|||
conversionTag = TMW_TO_WYLIE;
|
||||
} else if (convertToWylieTextMode) {
|
||||
conversionTag = TMW_TO_WYLIE_TEXT;
|
||||
} else if (convertUniToWylieTextMode) {
|
||||
conversionTag = UNI_TO_WYLIE_TEXT;
|
||||
} else if (convertToACIPRTFMode) {
|
||||
conversionTag = TMW_TO_ACIP;
|
||||
} else if (convertToACIPTextMode) {
|
||||
|
@ -320,8 +329,36 @@ public class TibetanConverter implements FontConverterConstants {
|
|||
static int reallyConvert(InputStream in, PrintStream out, String ct,
|
||||
String warningLevel, boolean shortMessages,
|
||||
boolean colors) {
|
||||
if (ACIP_TO_UNI_TEXT == ct || ACIP_TO_TMW == ct
|
||||
|| WYLIE_TO_UNI_TEXT == ct || WYLIE_TO_TMW == ct) {
|
||||
if (UNI_TO_WYLIE_TEXT == ct) {
|
||||
try {
|
||||
String uniText;
|
||||
{
|
||||
// TODO(dchandler): use, here and elsewhere in the
|
||||
// codebase,
|
||||
// org.apache.commons.io.IOUtils.toString(InputStream,
|
||||
// encoding)
|
||||
StringBuffer s = new StringBuffer();
|
||||
char ch[] = new char[8192];
|
||||
BufferedReader bin
|
||||
= new BufferedReader(new InputStreamReader(in,
|
||||
"UTF-8"));
|
||||
int amt;
|
||||
while (-1 != (amt = bin.read(ch))) {
|
||||
s.append(ch, 0, amt);
|
||||
}
|
||||
bin.close();
|
||||
uniText = s.toString();
|
||||
}
|
||||
StringBuffer errors = new StringBuffer();
|
||||
String ewtsText = Converter.convertToEwts(uniText, errors);
|
||||
// TODO(dchandler): is 51 the right choice?
|
||||
return (errors.length() > 0) ? 51 : 0;
|
||||
} catch (IOException e) {
|
||||
// TODO(dchandler): print it? where to?
|
||||
return 48;
|
||||
}
|
||||
} else if (ACIP_TO_UNI_TEXT == ct || ACIP_TO_TMW == ct
|
||||
|| WYLIE_TO_UNI_TEXT == ct || WYLIE_TO_TMW == ct) {
|
||||
try {
|
||||
ArrayList al
|
||||
= ((ACIP_TO_UNI_TEXT == ct || ACIP_TO_TMW == ct)
|
||||
|
@ -364,6 +401,7 @@ public class TibetanConverter implements FontConverterConstants {
|
|||
else
|
||||
return 0;
|
||||
} catch (IOException e) {
|
||||
// TODO(dchandler): print it? where to?
|
||||
return 48;
|
||||
}
|
||||
} else {
|
||||
|
|
38
source/org/thdl/tib/text/reverter/Converter.java
Normal file
38
source/org/thdl/tib/text/reverter/Converter.java
Normal file
|
@ -0,0 +1,38 @@
|
|||
/*
|
||||
The contents of this file are subject to the THDL Open Community License
|
||||
Version 1.0 (the "License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License on the THDL web site
|
||||
(http://www.thdl.org/).
|
||||
|
||||
Software distributed under the License is distributed on an "AS IS" basis,
|
||||
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
|
||||
License for the specific terms governing rights and limitations under the
|
||||
License.
|
||||
|
||||
The Initial Developer of this software is the Tibetan and Himalayan Digital
|
||||
Library (THDL). Portions created by the THDL are Copyright 2005 THDL.
|
||||
All Rights Reserved.
|
||||
|
||||
Contributor(s): ______________________________________.
|
||||
*/
|
||||
|
||||
package org.thdl.tib.text.reverter;
|
||||
|
||||
/** Static methods for converting Unicode to EWTS and
|
||||
* (TODO(dchandler): ACIP).
|
||||
* @author David Chandler
|
||||
*/
|
||||
public class Converter {
|
||||
/** Static methods provide all the fun! */
|
||||
private Converter() {
|
||||
throw new Error("There's no point in instantiating this class.");
|
||||
}
|
||||
|
||||
/** Converts Tibetan Unicode to EWTS transliteration. If errors
|
||||
* is non-null, error messages are appended to it. (Errors are
|
||||
* always inline.) */
|
||||
public static String convertToEwts(String unicode,
|
||||
StringBuffer errors /* DLC: use it */) {
|
||||
throw new Error("DLC not yet");
|
||||
}
|
||||
}
|
55
source/org/thdl/tib/text/reverter/ConverterTest.java
Normal file
55
source/org/thdl/tib/text/reverter/ConverterTest.java
Normal file
|
@ -0,0 +1,55 @@
|
|||
/*
|
||||
The contents of this file are subject to the THDL Open Community License
|
||||
Version 1.0 (the "License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License on the THDL web site
|
||||
(http://www.thdl.org/).
|
||||
|
||||
Software distributed under the License is distributed on an "AS IS" basis,
|
||||
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
|
||||
License for the specific terms governing rights and limitations under the
|
||||
License.
|
||||
|
||||
The Initial Developer of this software is the Tibetan and Himalayan Digital
|
||||
Library (THDL). Portions created by the THDL are Copyright 2005 THDL.
|
||||
All Rights Reserved.
|
||||
|
||||
Contributor(s): ______________________________________.
|
||||
*/
|
||||
|
||||
package org.thdl.tib.text.reverter;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
import org.thdl.util.ThdlOptions;
|
||||
import org.thdl.tib.text.ttt.ErrorsAndWarnings;
|
||||
|
||||
/** Tests the Converter class.
|
||||
*
|
||||
* @author David Chandler */
|
||||
public class ConverterTest extends TestCase {
|
||||
|
||||
/** Invokes a text UI and runs all this class's tests. */
|
||||
public static void main(String[] args) {
|
||||
junit.textui.TestRunner.run(ConverterTest.class);
|
||||
}
|
||||
|
||||
protected void setUp() {
|
||||
// We don't want to use options.txt:
|
||||
ThdlOptions.forTestingOnlyInitializeWithoutDefaultOptionsFile();
|
||||
|
||||
ThdlOptions.setUserPreference("thdl.acip.to.tibetan.warning.and.error.severities.are.built.in.defaults", "true");
|
||||
ThdlOptions.setUserPreference("thdl.acip.to.tibetan.warning.severity.507", "Most");
|
||||
ErrorsAndWarnings.setupSeverityMap();
|
||||
|
||||
// We don't want to load the TM or TMW font files ourselves:
|
||||
ThdlOptions.setUserPreference("thdl.rely.on.system.tmw.fonts", true);
|
||||
ThdlOptions.setUserPreference("thdl.rely.on.system.tm.fonts", true);
|
||||
ThdlOptions.setUserPreference("thdl.debug", true);
|
||||
}
|
||||
|
||||
public ConverterTest() { }
|
||||
|
||||
public void testUnicodeToEwts() {
|
||||
assertEquals(Converter.convertToEwts("\u0f40", null), "ka");
|
||||
}
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
/*
|
||||
The contents of this file are subject to the THDL Open Community License
|
||||
Version 1.0 (the "License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License on the THDL web site
|
||||
(http://www.thdl.org/).
|
||||
|
||||
Software distributed under the License is distributed on an "AS IS" basis,
|
||||
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
|
||||
License for the specific terms governing rights and limitations under the
|
||||
License.
|
||||
|
||||
The Initial Developer of this software is the Tibetan and Himalayan Digital
|
||||
Library (THDL). Portions created by the THDL are Copyright 2005 THDL.
|
||||
All Rights Reserved.
|
||||
|
||||
Contributor(s): ______________________________________.
|
||||
*/
|
||||
|
||||
package org.thdl.tib.text.reverter;
|
||||
|
||||
/** A class for use in XSL transformations that converts Unicode to
|
||||
* EWTS or ACIP transliteration. Note that the syntax for calling
|
||||
* Java extensions from XSL is vendor-specific; for more details,
|
||||
* please consult the documentation for the XSLT processor you use,
|
||||
* for example Saxon or Xalan-Java.
|
||||
* @author David Chandler
|
||||
*/
|
||||
public class UnicodeToTranslitForXslt {
|
||||
/** Static methods provide all the fun! */
|
||||
private UnicodeToTranslitForXslt() {
|
||||
throw new Error("There's no point in instantiating this class.");
|
||||
}
|
||||
|
||||
/** Converts Tibetan Unicode to EWTS transliteration. */
|
||||
public static String unicodeToEwts(String unicode) {
|
||||
return Converter.convertToEwts(unicode, null);
|
||||
}
|
||||
/** Converts Tibetan Unicode to ACIP transliteration. */
|
||||
public static String unicodeToAcip(String unicode) {
|
||||
throw new Error("DLC: not yet");
|
||||
}
|
||||
}
|
|
@ -0,0 +1,61 @@
|
|||
/*
|
||||
The contents of this file are subject to the THDL Open Community License
|
||||
Version 1.0 (the "License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License on the THDL web site
|
||||
(http://www.thdl.org/).
|
||||
|
||||
Software distributed under the License is distributed on an "AS IS" basis,
|
||||
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
|
||||
License for the specific terms governing rights and limitations under the
|
||||
License.
|
||||
|
||||
The Initial Developer of this software is the Tibetan and Himalayan Digital
|
||||
Library (THDL). Portions created by the THDL are Copyright 2005 THDL.
|
||||
All Rights Reserved.
|
||||
|
||||
Contributor(s): ______________________________________.
|
||||
*/
|
||||
|
||||
package org.thdl.tib.text.reverter;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
import org.thdl.util.ThdlOptions;
|
||||
import org.thdl.tib.text.ttt.ErrorsAndWarnings;
|
||||
|
||||
/** Tests the UnicodeToTranslitForXslt class.
|
||||
*
|
||||
* @author David Chandler */
|
||||
public class UnicodeToTranslitForXsltTest extends TestCase {
|
||||
|
||||
/** Invokes a text UI and runs all this class's tests. */
|
||||
public static void main(String[] args) {
|
||||
junit.textui.TestRunner.run(UnicodeToTranslitForXsltTest.class);
|
||||
}
|
||||
|
||||
protected void setUp() {
|
||||
// We don't want to use options.txt:
|
||||
ThdlOptions.forTestingOnlyInitializeWithoutDefaultOptionsFile();
|
||||
|
||||
ThdlOptions.setUserPreference("thdl.acip.to.tibetan.warning.and.error.severities.are.built.in.defaults", "true");
|
||||
ThdlOptions.setUserPreference("thdl.acip.to.tibetan.warning.severity.507", "Most");
|
||||
ErrorsAndWarnings.setupSeverityMap();
|
||||
|
||||
// We don't want to load the TM or TMW font files ourselves:
|
||||
ThdlOptions.setUserPreference("thdl.rely.on.system.tmw.fonts", true);
|
||||
ThdlOptions.setUserPreference("thdl.rely.on.system.tm.fonts", true);
|
||||
ThdlOptions.setUserPreference("thdl.debug", true);
|
||||
}
|
||||
|
||||
public UnicodeToTranslitForXsltTest() { }
|
||||
|
||||
public void testUnicodeToEwts() {
|
||||
assertEquals(UnicodeToTranslitForXslt.unicodeToEwts("\u0f40"), "ka");
|
||||
assertEquals(UnicodeToTranslitForXslt.unicodeToEwts("\u0f56\u0f62\u0f4f\u0f42\u0f66\u0f0b"), "brtags ");
|
||||
}
|
||||
|
||||
public void testUnicodeToAcip() {
|
||||
assertEquals(UnicodeToTranslitForXslt.unicodeToEwts("\u0f40"), "KA");
|
||||
assertEquals(UnicodeToTranslitForXslt.unicodeToEwts("\u0f56\u0f62\u0f4f\u0f42\u0f66\u0f0b"), "BRTAGS ");
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue