diff --git a/build.xml b/build.xml
index 19f5baf..f58524c 100644
--- a/build.xml
+++ b/build.xml
@@ -364,11 +364,18 @@ the jvm starting tomcat:
+
+
+
+
+
+
@@ -472,6 +479,11 @@ the jvm starting tomcat:
description="compiles all JUnit test cases that can be compiled in the present CLASSPATH (NB that this distinction is just wishful thinking for now because we have such weak test coverage at this point)" >
+
+
+
+
+
diff --git a/source/org/thdl/tib/text/ttt/EwtsToUnicodeForXslt.java b/source/org/thdl/tib/text/ttt/EwtsToUnicodeForXslt.java
new file mode 100644
index 0000000..4f0d081
--- /dev/null
+++ b/source/org/thdl/tib/text/ttt/EwtsToUnicodeForXslt.java
@@ -0,0 +1,46 @@
+/*
+The contents of this file are subject to the THDL Open Community License
+Version 1.0 (the "License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License on the THDL web site
+(http://www.thdl.org/).
+
+Software distributed under the License is distributed on an "AS IS" basis,
+WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+License for the specific terms governing rights and limitations under the
+License.
+
+The Initial Developer of this software is the Tibetan and Himalayan Digital
+Library (THDL). Portions created by the THDL are Copyright 2005 THDL.
+All Rights Reserved.
+
+Contributor(s): ______________________________________.
+*/
+
+package org.thdl.tib.text.ttt;
+
+/** A class for use in XSL transformations that converts EWTS
+ * transliteration to Unicode. This is intended to be used by Xalan
+ * XSLT to convert an XML document that uses Wylie into
+ * HTML/text/whatever that uses Unicode (probably TibetanMachineUni
+ * font).
+ * @author David Chandler
+ */
+public class EwtsToUnicodeForXslt {
+ /** Static methods provide all the fun! */
+ private EwtsToUnicodeForXslt() {
+ throw new Error("There's no point in instantiating this class.");
+ }
+
+ /** Converts EWTS transliteration into Tibetan Unicode.
+ * TODO(dchandler): must we worry about the encoding, UTF-8
+ * vs. UTF-16LE e.g.? */
+ public static String convertEwtsTo(String ewts) {
+ return TConverter.convertToUnicodeText(EWTSTraits.instance(),
+ ewts,
+ new StringBuffer(),
+ null,
+ false,
+ "None",
+ false);
+ }
+}
diff --git a/source/org/thdl/tib/text/ttt/EwtsToUnicodeForXsltTest.java b/source/org/thdl/tib/text/ttt/EwtsToUnicodeForXsltTest.java
new file mode 100644
index 0000000..f62709c
--- /dev/null
+++ b/source/org/thdl/tib/text/ttt/EwtsToUnicodeForXsltTest.java
@@ -0,0 +1,79 @@
+/*
+The contents of this file are subject to the THDL Open Community License
+Version 1.0 (the "License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License on the THDL web site
+(http://www.thdl.org/).
+
+Software distributed under the License is distributed on an "AS IS" basis,
+WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+License for the specific terms governing rights and limitations under the
+License.
+
+The Initial Developer of this software is the Tibetan and Himalayan Digital
+Library (THDL). Portions created by the THDL are Copyright 2005 THDL.
+All Rights Reserved.
+
+Contributor(s): ______________________________________.
+*/
+
+package org.thdl.tib.text.ttt;
+
+import junit.framework.TestCase;
+
+import org.thdl.util.ThdlOptions;
+
+/** Tests EwtsToUnicodeForXslt at the unit level. For such a class, a
+ * much more important test is one that actually uses XSLT.
+ * TODO(dchandler): write such a test. You may even be able to use
+ * JUnit for it.
+ *
+ * @author David Chandler */
+public class EwtsToUnicodeForXsltTest extends TestCase {
+
+ /** Invokes a text UI and runs all this class's tests. */
+ public static void main(String[] args) {
+ junit.textui.TestRunner.run(EwtsToUnicodeForXsltTest.class);
+ }
+
+ protected void setUp() {
+ // We don't want to use options.txt:
+ ThdlOptions.forTestingOnlyInitializeWithoutDefaultOptionsFile();
+
+ ThdlOptions.setUserPreference("thdl.acip.to.tibetan.warning.and.error.severities.are.built.in.defaults", "true");
+ ThdlOptions.setUserPreference("thdl.acip.to.tibetan.warning.severity.507", "Most");
+ ErrorsAndWarnings.setupSeverityMap();
+
+ // We don't want to load the TM or TMW font files ourselves:
+ ThdlOptions.setUserPreference("thdl.rely.on.system.tmw.fonts", true);
+ ThdlOptions.setUserPreference("thdl.rely.on.system.tm.fonts", true);
+ ThdlOptions.setUserPreference("thdl.debug", true);
+ }
+
+
+ public EwtsToUnicodeForXsltTest() { }
+
+ private static void help(String ewts, String expected) {
+ String actual = EwtsToUnicodeForXslt.convertEwtsTo(ewts);
+ assertEquals(expected, actual);
+ }
+
+ public void testIt() throws java.io.IOException {
+ help("ga",
+ "\u0f42");
+ help("\u0f00\u0f01\u0f02 \u0f03 \u0fcf",
+ "\u0f00\u0f01\u0f02\u0f0b\u0f03\u0f0b"
+ + "\u0fcf");
+// TODO(dchandler): I think EWTS->Tibetan ought to not give errors
+// about the disambiguators here:
+// help("\u0f00.\u0f01.\u0f02 \u0f03 \u0fcf",
+// "\u0f00\u0f01\u0f02\u0f0b\u0f03\u0f0b"
+// + "\u0fcf");
+ help("k+Shu+A+i+o+eHM",
+ "\u0f40\u0fb5\u0f71\u0f74\u0f72\u0f7a\u0f7c\u0f7e"
+ + "\u0f7f");
+ help(" . ",
+ "\u0f0b[#ERROR 130: The tsheg bar (\"syllable\") {.} is"
+ + " essentially nothing.]\u0f0b");
+ }
+}
+
diff --git a/source/org/thdl/tib/text/ttt/TConverter.java b/source/org/thdl/tib/text/ttt/TConverter.java
index e77640f..b6bc98c 100644
--- a/source/org/thdl/tib/text/ttt/TConverter.java
+++ b/source/org/thdl/tib/text/ttt/TConverter.java
@@ -196,18 +196,18 @@ public class TConverter {
loc[0] == tdoc.getLength());
}
- /** Returns UTF-8 encoded Unicode. A bit indirect, so use this
- * for testing only if performance is a concern. If errors occur
- * in scanning the transliteration or in converting a tsheg bar,
- * then they are appended to errors if errors is non-null, as
- * well as written to the result. If warnings occur in scanning
- * the transliteration or in converting a tsheg bar, then they
- * are appended to warnings if warnings is non-null, and they are
- * written to the result if writeWarningsToResult is true. Error
- * and warning messages are long and self-contained unless
- * shortMessages is true. Returns the conversion upon perfect
- * success or if there were merely warnings, null if errors
- * occurred. */
+ /** Returns the Unicode that the given translit corresponds to. A
+ * bit indirect, so use this for testing only if performance is a
+ * concern. If errors occur in scanning the transliteration or
+ * in converting a tsheg bar, then they are appended to errors if
+ * errors is non-null, as well as written to the result. If
+ * warnings occur in scanning the transliteration or in
+ * converting a tsheg bar, then they are appended to warnings if
+ * warnings is non-null, and they are written to the result if
+ * writeWarningsToResult is true. Error and warning messages are
+ * long and self-contained unless shortMessages is true. Returns
+ * the conversion upon perfect success or if there were merely
+ * warnings, null if errors occurred. */
public static String convertToUnicodeText(TTraits ttraits,
String translit,
StringBuffer errors,
@@ -229,6 +229,9 @@ public class TConverter {
return null;
}
} catch (IOException e) {
+ // Won't happen. UTF-8 is guaranteed to be a supported
+ // encoding, and ByteArrayOutputStreams don't have such
+ // problems I don't think.
throw new Error(e.toString());
}
}