diff --git a/build.xml b/build.xml
index f58524c..9d03e01 100644
--- a/build.xml
+++ b/build.xml
@@ -479,6 +479,16 @@ the jvm starting tomcat:
description="compiles all JUnit test cases that can be compiled in the present CLASSPATH (NB that this distinction is just wishful thinking for now because we have such weak test coverage at this point)" >
+
+
+
+
+
+
+
+
+
diff --git a/source/org/thdl/tib/input/ConvertDialog.java b/source/org/thdl/tib/input/ConvertDialog.java
index 1ec3e33..7b13237 100644
--- a/source/org/thdl/tib/input/ConvertDialog.java
+++ b/source/org/thdl/tib/input/ConvertDialog.java
@@ -97,14 +97,14 @@ class ConvertDialog extends JDialog
|| choices.getSelectedItem() == WYLIE_TO_UNI_TEXT);
}
- private javax.swing.filechooser.FileFilter acipff, rtfff;
+ private javax.swing.filechooser.FileFilter textFileFilter, rtfFileFilter;
private void init()
{
jfc = new JFileChooser(controller.getDefaultDirectory());
jfc.setDialogTitle(LOCATE_FILE);
- jfc.addChoosableFileFilter(acipff = new ACIPFileFilter());
- jfc.addChoosableFileFilter(rtfff = new RTFFileFilter());
+ jfc.addChoosableFileFilter(textFileFilter = new TextFileFilter());
+ jfc.addChoosableFileFilter(rtfFileFilter = new RTFFileFilter());
content = new JPanel(new GridLayout(0,1));
JPanel temp = new JPanel(new FlowLayout(FlowLayout.CENTER,5,5));
@@ -186,7 +186,7 @@ class ConvertDialog extends JDialog
content.add(buttonBox);
setContentPane(content);
pack();
- setSize(new Dimension(600,240));
+ setSize(new Dimension(760,340));
}
private void setChoices(String[] choices)
@@ -241,15 +241,17 @@ class ConvertDialog extends JDialog
if (src == browseOld) {
jfc.setFileFilter((ACIP_TO_UNI_TEXT.equals((String)choices.getSelectedItem())
|| WYLIE_TO_UNI_TEXT.equals((String)choices.getSelectedItem())
+ || UNI_TO_WYLIE_TEXT.equals((String)choices.getSelectedItem())
|| ACIP_TO_TMW.equals((String)choices.getSelectedItem())
|| WYLIE_TO_TMW.equals((String)choices.getSelectedItem()))
- ? acipff : rtfff);
+ ? textFileFilter : rtfFileFilter);
} else {
jfc.setFileFilter((ACIP_TO_UNI_TEXT.equals((String)choices.getSelectedItem())
|| WYLIE_TO_UNI_TEXT.equals((String)choices.getSelectedItem())
+ || UNI_TO_WYLIE_TEXT.equals((String)choices.getSelectedItem())
|| TMW_TO_ACIP_TEXT.equals((String)choices.getSelectedItem())
|| TMW_TO_WYLIE_TEXT.equals((String)choices.getSelectedItem()))
- ? acipff : rtfff);
+ ? textFileFilter : rtfFileFilter);
}
if (jfc.showOpenDialog(this) != jfc.APPROVE_OPTION)
return;
@@ -445,13 +447,25 @@ class ConvertDialog extends JDialog
else
oldFileDirName = oldFileDirName + File.separator;
String oldFileNameSansThingy = of.getName();
- if (oldFileNameSansThingy.startsWith("TMW_")) {
+ if (oldFileNameSansThingy.startsWith(suggested_TO_TMW_prefix)) {
oldFileNameSansThingy
- = oldFileNameSansThingy.substring("TMW_".length(),
+ = oldFileNameSansThingy.substring(suggested_TO_TMW_prefix.length(),
oldFileNameSansThingy.length());
- } else if (oldFileNameSansThingy.startsWith("TM_")) {
+ } else if (oldFileNameSansThingy.startsWith(suggested_TO_TM_prefix)) {
oldFileNameSansThingy
- = oldFileNameSansThingy.substring("TM_".length(),
+ = oldFileNameSansThingy.substring(suggested_TO_TM_prefix.length(),
+ oldFileNameSansThingy.length());
+ } else if (oldFileNameSansThingy.startsWith(suggested_TO_UNI_prefix)) {
+ oldFileNameSansThingy
+ = oldFileNameSansThingy.substring(suggested_TO_UNI_prefix.length(),
+ oldFileNameSansThingy.length());
+ } else if (oldFileNameSansThingy.startsWith(suggested_ACIP_prefix)) {
+ oldFileNameSansThingy
+ = oldFileNameSansThingy.substring(suggested_ACIP_prefix.length(),
+ oldFileNameSansThingy.length());
+ } else if (oldFileNameSansThingy.startsWith(suggested_WYLIE_prefix)) {
+ oldFileNameSansThingy
+ = oldFileNameSansThingy.substring(suggested_WYLIE_prefix.length(),
oldFileNameSansThingy.length());
} else if (oldFileNameSansThingy.startsWith("TMW")) {
oldFileNameSansThingy
@@ -481,8 +495,11 @@ class ConvertDialog extends JDialog
newFileNamePrefix = "TMW_to_same_TMW__";
newFileNameExtension = ".RTF";
} else { // conversion mode
- if (TMW_TO_WYLIE == ct) {
+ if (TMW_TO_WYLIE == ct
+ || UNI_TO_WYLIE_TEXT == ct) {
newFileNamePrefix = suggested_WYLIE_prefix;
+ if (UNI_TO_WYLIE_TEXT == ct)
+ newFileNameExtension = ".TXT";
} else if (TMW_TO_WYLIE_TEXT == ct) {
newFileNamePrefix = suggested_WYLIE_prefix;
newFileNameExtension = ".TXT";
@@ -531,8 +548,7 @@ class ConvertDialog extends JDialog
}
}
- // TODO(DLC)[EWTS->Tibetan]: we use for wylie (ewts) too...
- public class ACIPFileFilter extends javax.swing.filechooser.FileFilter
+ public class TextFileFilter extends javax.swing.filechooser.FileFilter
{
public boolean accept(File f)
{
diff --git a/source/org/thdl/tib/input/FontConverterConstants.java b/source/org/thdl/tib/input/FontConverterConstants.java
index b5734ed..a3b5b35 100644
--- a/source/org/thdl/tib/input/FontConverterConstants.java
+++ b/source/org/thdl/tib/input/FontConverterConstants.java
@@ -24,6 +24,7 @@ package org.thdl.tib.input;
@author Nathaniel Garson, Tibetan and Himalayan Digital Library */
interface FontConverterConstants
{
+ final String UNI_TO_WYLIE_TEXT = "Unicode to Wylie (UTF-8 Text->Text)";
final String WYLIE_TO_UNI_TEXT = "Wylie to Unicode (Text->Text)";
final String WYLIE_TO_TMW = "Wylie to TMW (Text->RTF)";
final String TMW_TO_SAME_TMW = "TMW to the same TMW (for testing only) (RTF->RTF)";
@@ -60,6 +61,7 @@ interface FontConverterConstants
};
final String[] DEBUG_CHOICES = new String[] {
+ UNI_TO_WYLIE_TEXT,
TMW_TO_SAME_TMW,
WYLIE_TO_UNI_TEXT,
WYLIE_TO_TMW,
diff --git a/source/org/thdl/tib/input/TibetanConverter.java b/source/org/thdl/tib/input/TibetanConverter.java
index 4713f90..7def541 100644
--- a/source/org/thdl/tib/input/TibetanConverter.java
+++ b/source/org/thdl/tib/input/TibetanConverter.java
@@ -18,10 +18,12 @@ Contributor(s): ______________________________________.
package org.thdl.tib.input;
+import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
+import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.PrintStream;
import java.util.ArrayList;
@@ -31,6 +33,7 @@ import javax.swing.text.StyleConstants;
import javax.swing.text.rtf.RTFEditorKit;
import org.thdl.tib.text.TibetanDocument;
+import org.thdl.tib.text.reverter.Converter;
import org.thdl.tib.text.ttt.ACIPTraits;
import org.thdl.tib.text.ttt.EWTSTraits;
import org.thdl.tib.text.ttt.TConverter;
@@ -89,6 +92,7 @@ public class TibetanConverter implements FontConverterConstants {
boolean convertToWylieTextMode = false;
boolean convertToACIPRTFMode = false;
boolean convertToACIPTextMode = false;
+ boolean convertUniToWylieTextMode = false;
boolean findSomeNonTMWMode = false;
boolean findAllNonTMWMode = false;
boolean findSomeNonTMMode = false;
@@ -123,6 +127,8 @@ public class TibetanConverter implements FontConverterConstants {
= args[numArgs - 2].equals("--tmw-to-tmw-for-testing"))
|| (convertToTMMode
= args[numArgs - 2].equals("--to-tibetan-machine"))
+ || (convertUniToWylieTextMode
+ = args[numArgs - 2].equals("--utf8-text-to-ewts-text"))
|| (convertToTMWMode
= args[numArgs - 2].equals("--to-tibetan-machine-web"))
|| (convertACIPToUniMode
@@ -224,7 +230,8 @@ public class TibetanConverter implements FontConverterConstants {
out.println("are in your document waiting for your personal attention,");
out.println("43 if not even one glyph found was eligible for this conversion, which means");
out.println("that you probably selected the wrong conversion or the wrong document, or ");
- out.println("nonzero otherwise.");
+ out.println("nonzero on some other error.");
+ // TODO(dchandler): describe 47 48 50 etc.
out.println("");
out.println("You may find it helpful to use `--find-some-non-tmw' mode (or");
out.println("`--find-some-non-tm' mode for Tibetan Machine input) before doing a");
@@ -266,6 +273,8 @@ public class TibetanConverter implements FontConverterConstants {
conversionTag = TMW_TO_WYLIE;
} else if (convertToWylieTextMode) {
conversionTag = TMW_TO_WYLIE_TEXT;
+ } else if (convertUniToWylieTextMode) {
+ conversionTag = UNI_TO_WYLIE_TEXT;
} else if (convertToACIPRTFMode) {
conversionTag = TMW_TO_ACIP;
} else if (convertToACIPTextMode) {
@@ -320,8 +329,36 @@ public class TibetanConverter implements FontConverterConstants {
static int reallyConvert(InputStream in, PrintStream out, String ct,
String warningLevel, boolean shortMessages,
boolean colors) {
- if (ACIP_TO_UNI_TEXT == ct || ACIP_TO_TMW == ct
- || WYLIE_TO_UNI_TEXT == ct || WYLIE_TO_TMW == ct) {
+ if (UNI_TO_WYLIE_TEXT == ct) {
+ try {
+ String uniText;
+ {
+ // TODO(dchandler): use, here and elsewhere in the
+ // codebase,
+ // org.apache.commons.io.IOUtils.toString(InputStream,
+ // encoding)
+ StringBuffer s = new StringBuffer();
+ char ch[] = new char[8192];
+ BufferedReader bin
+ = new BufferedReader(new InputStreamReader(in,
+ "UTF-8"));
+ int amt;
+ while (-1 != (amt = bin.read(ch))) {
+ s.append(ch, 0, amt);
+ }
+ bin.close();
+ uniText = s.toString();
+ }
+ StringBuffer errors = new StringBuffer();
+ String ewtsText = Converter.convertToEwts(uniText, errors);
+ // TODO(dchandler): is 51 the right choice?
+ return (errors.length() > 0) ? 51 : 0;
+ } catch (IOException e) {
+ // TODO(dchandler): print it? where to?
+ return 48;
+ }
+ } else if (ACIP_TO_UNI_TEXT == ct || ACIP_TO_TMW == ct
+ || WYLIE_TO_UNI_TEXT == ct || WYLIE_TO_TMW == ct) {
try {
ArrayList al
= ((ACIP_TO_UNI_TEXT == ct || ACIP_TO_TMW == ct)
@@ -364,6 +401,7 @@ public class TibetanConverter implements FontConverterConstants {
else
return 0;
} catch (IOException e) {
+ // TODO(dchandler): print it? where to?
return 48;
}
} else {
diff --git a/source/org/thdl/tib/text/reverter/Converter.java b/source/org/thdl/tib/text/reverter/Converter.java
new file mode 100644
index 0000000..623112a
--- /dev/null
+++ b/source/org/thdl/tib/text/reverter/Converter.java
@@ -0,0 +1,38 @@
+/*
+The contents of this file are subject to the THDL Open Community License
+Version 1.0 (the "License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License on the THDL web site
+(http://www.thdl.org/).
+
+Software distributed under the License is distributed on an "AS IS" basis,
+WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+License for the specific terms governing rights and limitations under the
+License.
+
+The Initial Developer of this software is the Tibetan and Himalayan Digital
+Library (THDL). Portions created by the THDL are Copyright 2005 THDL.
+All Rights Reserved.
+
+Contributor(s): ______________________________________.
+*/
+
+package org.thdl.tib.text.reverter;
+
+/** Static methods for converting Unicode to EWTS and
+ * (TODO(dchandler): ACIP).
+ * @author David Chandler
+ */
+public class Converter {
+ /** Static methods provide all the fun! */
+ private Converter() {
+ throw new Error("There's no point in instantiating this class.");
+ }
+
+ /** Converts Tibetan Unicode to EWTS transliteration. If errors
+ * is non-null, error messages are appended to it. (Errors are
+ * always inline.) */
+ public static String convertToEwts(String unicode,
+ StringBuffer errors /* DLC: use it */) {
+ throw new Error("DLC not yet");
+ }
+}
diff --git a/source/org/thdl/tib/text/reverter/ConverterTest.java b/source/org/thdl/tib/text/reverter/ConverterTest.java
new file mode 100644
index 0000000..5c97876
--- /dev/null
+++ b/source/org/thdl/tib/text/reverter/ConverterTest.java
@@ -0,0 +1,55 @@
+/*
+The contents of this file are subject to the THDL Open Community License
+Version 1.0 (the "License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License on the THDL web site
+(http://www.thdl.org/).
+
+Software distributed under the License is distributed on an "AS IS" basis,
+WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+License for the specific terms governing rights and limitations under the
+License.
+
+The Initial Developer of this software is the Tibetan and Himalayan Digital
+Library (THDL). Portions created by the THDL are Copyright 2005 THDL.
+All Rights Reserved.
+
+Contributor(s): ______________________________________.
+*/
+
+package org.thdl.tib.text.reverter;
+
+import junit.framework.TestCase;
+
+import org.thdl.util.ThdlOptions;
+import org.thdl.tib.text.ttt.ErrorsAndWarnings;
+
+/** Tests the Converter class.
+ *
+ * @author David Chandler */
+public class ConverterTest extends TestCase {
+
+ /** Invokes a text UI and runs all this class's tests. */
+ public static void main(String[] args) {
+ junit.textui.TestRunner.run(ConverterTest.class);
+ }
+
+ protected void setUp() {
+ // We don't want to use options.txt:
+ ThdlOptions.forTestingOnlyInitializeWithoutDefaultOptionsFile();
+
+ ThdlOptions.setUserPreference("thdl.acip.to.tibetan.warning.and.error.severities.are.built.in.defaults", "true");
+ ThdlOptions.setUserPreference("thdl.acip.to.tibetan.warning.severity.507", "Most");
+ ErrorsAndWarnings.setupSeverityMap();
+
+ // We don't want to load the TM or TMW font files ourselves:
+ ThdlOptions.setUserPreference("thdl.rely.on.system.tmw.fonts", true);
+ ThdlOptions.setUserPreference("thdl.rely.on.system.tm.fonts", true);
+ ThdlOptions.setUserPreference("thdl.debug", true);
+ }
+
+ public ConverterTest() { }
+
+ public void testUnicodeToEwts() {
+ assertEquals(Converter.convertToEwts("\u0f40", null), "ka");
+ }
+}
diff --git a/source/org/thdl/tib/text/reverter/UnicodeToTranslitForXslt.java b/source/org/thdl/tib/text/reverter/UnicodeToTranslitForXslt.java
new file mode 100644
index 0000000..2fceaed
--- /dev/null
+++ b/source/org/thdl/tib/text/reverter/UnicodeToTranslitForXslt.java
@@ -0,0 +1,42 @@
+/*
+The contents of this file are subject to the THDL Open Community License
+Version 1.0 (the "License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License on the THDL web site
+(http://www.thdl.org/).
+
+Software distributed under the License is distributed on an "AS IS" basis,
+WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+License for the specific terms governing rights and limitations under the
+License.
+
+The Initial Developer of this software is the Tibetan and Himalayan Digital
+Library (THDL). Portions created by the THDL are Copyright 2005 THDL.
+All Rights Reserved.
+
+Contributor(s): ______________________________________.
+*/
+
+package org.thdl.tib.text.reverter;
+
+/** A class for use in XSL transformations that converts Unicode to
+ * EWTS or ACIP transliteration. Note that the syntax for calling
+ * Java extensions from XSL is vendor-specific; for more details,
+ * please consult the documentation for the XSLT processor you use,
+ * for example Saxon or Xalan-Java.
+ * @author David Chandler
+ */
+public class UnicodeToTranslitForXslt {
+ /** Static methods provide all the fun! */
+ private UnicodeToTranslitForXslt() {
+ throw new Error("There's no point in instantiating this class.");
+ }
+
+ /** Converts Tibetan Unicode to EWTS transliteration. */
+ public static String unicodeToEwts(String unicode) {
+ return Converter.convertToEwts(unicode, null);
+ }
+ /** Converts Tibetan Unicode to ACIP transliteration. */
+ public static String unicodeToAcip(String unicode) {
+ throw new Error("DLC: not yet");
+ }
+}
diff --git a/source/org/thdl/tib/text/reverter/UnicodeToTranslitForXsltTest.java b/source/org/thdl/tib/text/reverter/UnicodeToTranslitForXsltTest.java
new file mode 100644
index 0000000..9012b49
--- /dev/null
+++ b/source/org/thdl/tib/text/reverter/UnicodeToTranslitForXsltTest.java
@@ -0,0 +1,61 @@
+/*
+The contents of this file are subject to the THDL Open Community License
+Version 1.0 (the "License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License on the THDL web site
+(http://www.thdl.org/).
+
+Software distributed under the License is distributed on an "AS IS" basis,
+WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+License for the specific terms governing rights and limitations under the
+License.
+
+The Initial Developer of this software is the Tibetan and Himalayan Digital
+Library (THDL). Portions created by the THDL are Copyright 2005 THDL.
+All Rights Reserved.
+
+Contributor(s): ______________________________________.
+*/
+
+package org.thdl.tib.text.reverter;
+
+import junit.framework.TestCase;
+
+import org.thdl.util.ThdlOptions;
+import org.thdl.tib.text.ttt.ErrorsAndWarnings;
+
+/** Tests the UnicodeToTranslitForXslt class.
+ *
+ * @author David Chandler */
+public class UnicodeToTranslitForXsltTest extends TestCase {
+
+ /** Invokes a text UI and runs all this class's tests. */
+ public static void main(String[] args) {
+ junit.textui.TestRunner.run(UnicodeToTranslitForXsltTest.class);
+ }
+
+ protected void setUp() {
+ // We don't want to use options.txt:
+ ThdlOptions.forTestingOnlyInitializeWithoutDefaultOptionsFile();
+
+ ThdlOptions.setUserPreference("thdl.acip.to.tibetan.warning.and.error.severities.are.built.in.defaults", "true");
+ ThdlOptions.setUserPreference("thdl.acip.to.tibetan.warning.severity.507", "Most");
+ ErrorsAndWarnings.setupSeverityMap();
+
+ // We don't want to load the TM or TMW font files ourselves:
+ ThdlOptions.setUserPreference("thdl.rely.on.system.tmw.fonts", true);
+ ThdlOptions.setUserPreference("thdl.rely.on.system.tm.fonts", true);
+ ThdlOptions.setUserPreference("thdl.debug", true);
+ }
+
+ public UnicodeToTranslitForXsltTest() { }
+
+ public void testUnicodeToEwts() {
+ assertEquals(UnicodeToTranslitForXslt.unicodeToEwts("\u0f40"), "ka");
+ assertEquals(UnicodeToTranslitForXslt.unicodeToEwts("\u0f56\u0f62\u0f4f\u0f42\u0f66\u0f0b"), "brtags ");
+ }
+
+ public void testUnicodeToAcip() {
+ assertEquals(UnicodeToTranslitForXslt.unicodeToEwts("\u0f40"), "KA");
+ assertEquals(UnicodeToTranslitForXslt.unicodeToEwts("\u0f56\u0f62\u0f4f\u0f42\u0f66\u0f0b"), "BRTAGS ");
+ }
+}