Added a class for performing EWTS->Unicode conversions during XSLT

transformations. I haven't actually used it with Xalan XSLT yet, but it ought to work if TibetanHTML did (which it must have at one point). I do have a unit test, but an end-to-end test with Xalan is what we need.
2005-07-13 07:25:18 +00:00 · 2005-07-13 07:25:18 +00:00 · dc18165992
commit dc18165992
parent 6260c0889d
5 changed files with 153 additions and 12 deletions
--- a/source/org/thdl/tib/text/ttt/EwtsToUnicodeForXslt.java
+++ b/source/org/thdl/tib/text/ttt/EwtsToUnicodeForXslt.java
@ -0,0 +1,46 @@
+/*
+The contents of this file are subject to the THDL Open Community License
+Version 1.0 (the "License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License on the THDL web site 
+(http://www.thdl.org/).
+
+Software distributed under the License is distributed on an "AS IS" basis, 
+WITHOUT WARRANTY OF ANY KIND, either express or implied. See the 
+License for the specific terms governing rights and limitations under the 
+License. 
+
+The Initial Developer of this software is the Tibetan and Himalayan Digital
+Library (THDL). Portions created by the THDL are Copyright 2005 THDL.
+All Rights Reserved. 
+
+Contributor(s): ______________________________________.
+*/
+
+package org.thdl.tib.text.ttt;
+
+/** A class for use in XSL transformations that converts EWTS
+ *  transliteration to Unicode.  This is intended to be used by Xalan
+ *  XSLT to convert an XML document that uses Wylie into
+ *  HTML/text/whatever that uses Unicode (probably TibetanMachineUni
+ *  font).
+ *  @author David Chandler
+ */
+public class EwtsToUnicodeForXslt {
+    /** Static methods provide all the fun! */
+    private EwtsToUnicodeForXslt() {
+        throw new Error("There's no point in instantiating this class.");
+    }
+
+    /** Converts EWTS transliteration into Tibetan Unicode.
+     *  TODO(dchandler): must we worry about the encoding, UTF-8
+     *  vs. UTF-16LE e.g.? */
+    public static String convertEwtsTo(String ewts) {
+        return TConverter.convertToUnicodeText(EWTSTraits.instance(),
+                                               ewts,
+                                               new StringBuffer(),
+                                               null,
+                                               false,
+                                               "None",
+                                               false);
+    }
+}
--- a/source/org/thdl/tib/text/ttt/EwtsToUnicodeForXsltTest.java
+++ b/source/org/thdl/tib/text/ttt/EwtsToUnicodeForXsltTest.java
@ -0,0 +1,79 @@
+/*
+The contents of this file are subject to the THDL Open Community License
+Version 1.0 (the "License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License on the THDL web site 
+(http://www.thdl.org/).
+
+Software distributed under the License is distributed on an "AS IS" basis, 
+WITHOUT WARRANTY OF ANY KIND, either express or implied. See the 
+License for the specific terms governing rights and limitations under the 
+License. 
+
+The Initial Developer of this software is the Tibetan and Himalayan Digital
+Library (THDL). Portions created by the THDL are Copyright 2005 THDL.
+All Rights Reserved. 
+
+Contributor(s): ______________________________________.
+*/
+
+package org.thdl.tib.text.ttt;
+
+import junit.framework.TestCase;
+
+import org.thdl.util.ThdlOptions;
+
+/** Tests EwtsToUnicodeForXslt at the unit level.  For such a class, a
+ *  much more important test is one that actually uses XSLT.
+ *  TODO(dchandler): write such a test.  You may even be able to use
+ *  JUnit for it.
+ *
+ *  @author David Chandler */
+public class EwtsToUnicodeForXsltTest extends TestCase {
+
+    /** Invokes a text UI and runs all this class's tests. */
+    public static void main(String[] args) {
+        junit.textui.TestRunner.run(EwtsToUnicodeForXsltTest.class);
+    }
+
+    protected void setUp() {
+        // We don't want to use options.txt:
+        ThdlOptions.forTestingOnlyInitializeWithoutDefaultOptionsFile();
+
+        ThdlOptions.setUserPreference("thdl.acip.to.tibetan.warning.and.error.severities.are.built.in.defaults", "true");
+        ThdlOptions.setUserPreference("thdl.acip.to.tibetan.warning.severity.507", "Most");
+        ErrorsAndWarnings.setupSeverityMap();
+
+        // We don't want to load the TM or TMW font files ourselves:
+        ThdlOptions.setUserPreference("thdl.rely.on.system.tmw.fonts", true);
+        ThdlOptions.setUserPreference("thdl.rely.on.system.tm.fonts", true);
+        ThdlOptions.setUserPreference("thdl.debug", true);
+    }
+
+
+    public EwtsToUnicodeForXsltTest() { }
+
+    private static void help(String ewts, String expected) {
+        String actual = EwtsToUnicodeForXslt.convertEwtsTo(ewts);
+        assertEquals(expected, actual);
+    }
+
+    public void testIt() throws java.io.IOException {
+        help("ga",
+             "\u0f42");
+        help("\u0f00\u0f01\u0f02 \u0f03 \u0fcf",
+             "\u0f00\u0f01\u0f02\u0f0b\u0f03\u0f0b"
+             + "\u0fcf");
+// TODO(dchandler): I think EWTS->Tibetan ought to not give errors
+// about the disambiguators here:
+//         help("\u0f00.\u0f01.\u0f02 \u0f03 \u0fcf",
+//              "\u0f00\u0f01\u0f02\u0f0b\u0f03\u0f0b"
+//              + "\u0fcf");
+        help("k+Shu+A+i+o+eHM",
+             "\u0f40\u0fb5\u0f71\u0f74\u0f72\u0f7a\u0f7c\u0f7e"
+             + "\u0f7f");
+        help(" . ",
+             "\u0f0b[#ERROR 130: The tsheg bar (\"syllable\") {.} is"
+             + " essentially nothing.]\u0f0b");
+    }
+}
+
--- a/source/org/thdl/tib/text/ttt/TConverter.java
+++ b/source/org/thdl/tib/text/ttt/TConverter.java
@ -196,18 +196,18 @@ public class TConverter {
                         loc[0] == tdoc.getLength());
    }

-    /** Returns UTF-8 encoded Unicode.  A bit indirect, so use this
-     *  for testing only if performance is a concern.  If errors occur
-     *  in scanning the transliteration or in converting a tsheg bar,
-     *  then they are appended to errors if errors is non-null, as
-     *  well as written to the result.  If warnings occur in scanning
-     *  the transliteration or in converting a tsheg bar, then they
-     *  are appended to warnings if warnings is non-null, and they are
-     *  written to the result if writeWarningsToResult is true.  Error
-     *  and warning messages are long and self-contained unless
-     *  shortMessages is true.  Returns the conversion upon perfect
-     *  success or if there were merely warnings, null if errors
-     *  occurred.  */
+    /** Returns the Unicode that the given translit corresponds to.  A
+     *  bit indirect, so use this for testing only if performance is a
+     *  concern.  If errors occur in scanning the transliteration or
+     *  in converting a tsheg bar, then they are appended to errors if
+     *  errors is non-null, as well as written to the result.  If
+     *  warnings occur in scanning the transliteration or in
+     *  converting a tsheg bar, then they are appended to warnings if
+     *  warnings is non-null, and they are written to the result if
+     *  writeWarningsToResult is true.  Error and warning messages are
+     *  long and self-contained unless shortMessages is true.  Returns
+     *  the conversion upon perfect success or if there were merely
+     *  warnings, null if errors occurred. */
    public static String convertToUnicodeText(TTraits ttraits,
                                              String translit,
                                              StringBuffer errors,
@ -229,6 +229,9 @@ public class TConverter {
                return null;
            }
        } catch (IOException e) {
+            // Won't happen.  UTF-8 is guaranteed to be a supported
+            // encoding, and ByteArrayOutputStreams don't have such
+            // problems I don't think.
            throw new Error(e.toString());
        }
    }