Added a class for performing EWTS->Unicode conversions during XSLT

transformations.  I haven't actually used it with Xalan XSLT yet, but
it ought to work if TibetanHTML did (which it must have at one point).

I do have a unit test, but an end-to-end test with Xalan is what we
need.
This commit is contained in:
dchandler 2005-07-13 07:25:18 +00:00
parent 6260c0889d
commit dc18165992
5 changed files with 153 additions and 12 deletions

View file

@ -364,11 +364,18 @@ the jvm starting tomcat:
<param name="my.included.source.file"
value="org/thdl/tib/input/Jskad.java"/>
</antcall>
<!-- For XSLT, we want this in jskad.jar: -->
<antcall target="our-internal-javac-task">
<param name="mybin" value="${jskadbin}"/>
<param name="my.included.source.file"
value="org/thdl/tib/text/TibetanHTML.java"/>
</antcall>
<!-- For XSLT, we want this in jskad.jar: -->
<antcall target="our-internal-javac-task">
<param name="mybin" value="${jskadbin}"/>
<param name="my.included.source.file"
value="org/thdl/tib/text/ttt/EwtsToUnicodeForXslt.java"/>
</antcall>
<!-- Put org.thdl.VerboseUnicodeDump in Jskad's jar for those who
want to use it. -->
<antcall target="our-internal-javac-task">
@ -472,6 +479,11 @@ the jvm starting tomcat:
description="compiles all JUnit test cases that can be compiled in the present CLASSPATH (NB that this distinction is just wishful thinking for now because we have such weak test coverage at this point)" >
<mkdir dir="${junitbin}"/>
<antcall target="create-timestamp-source-code"/> <!-- DLC NOW! The -run targets are mucking with this! It isn't fatal, but it should be fixed. -->
<antcall target="our-internal-javac-task">
<param name="mybin" value="${junitbin}"/>
<param name="my.included.source.file"
value="org/thdl/tib/text/ttt/EwtsToUnicodeForXsltTest.java"/>
</antcall>
<antcall target="our-internal-javac-task">
<param name="mybin" value="${junitbin}"/>
<param name="my.included.source.file"

View file

@ -73,6 +73,7 @@
<formatter type="xml"/><!-- If not XML, then 'ant -buildfile
build.xml check-report' will fail. -->
<sysproperty key="java.awt.headless" value="true"/>
<test name="org.thdl.tib.text.ttt.EwtsToUnicodeForXsltTest"/>
<test name="org.thdl.tib.text.ttt.EWTSTest"/>
<test name="org.thdl.tib.text.ttt.EWTStibwniniTest"/>
<test name="org.thdl.tib.input.TMW_RTF_TO_THDL_WYLIETest"/>

View file

@ -0,0 +1,46 @@
/*
The contents of this file are subject to the THDL Open Community License
Version 1.0 (the "License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License on the THDL web site
(http://www.thdl.org/).
Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
License for the specific terms governing rights and limitations under the
License.
The Initial Developer of this software is the Tibetan and Himalayan Digital
Library (THDL). Portions created by the THDL are Copyright 2005 THDL.
All Rights Reserved.
Contributor(s): ______________________________________.
*/
package org.thdl.tib.text.ttt;
/** A class for use in XSL transformations that converts EWTS
* transliteration to Unicode. This is intended to be used by Xalan
* XSLT to convert an XML document that uses Wylie into
* HTML/text/whatever that uses Unicode (probably TibetanMachineUni
* font).
* @author David Chandler
*/
public class EwtsToUnicodeForXslt {
/** Static methods provide all the fun! */
private EwtsToUnicodeForXslt() {
throw new Error("There's no point in instantiating this class.");
}
/** Converts EWTS transliteration into Tibetan Unicode.
* TODO(dchandler): must we worry about the encoding, UTF-8
* vs. UTF-16LE e.g.? */
public static String convertEwtsTo(String ewts) {
return TConverter.convertToUnicodeText(EWTSTraits.instance(),
ewts,
new StringBuffer(),
null,
false,
"None",
false);
}
}

View file

@ -0,0 +1,79 @@
/*
The contents of this file are subject to the THDL Open Community License
Version 1.0 (the "License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License on the THDL web site
(http://www.thdl.org/).
Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
License for the specific terms governing rights and limitations under the
License.
The Initial Developer of this software is the Tibetan and Himalayan Digital
Library (THDL). Portions created by the THDL are Copyright 2005 THDL.
All Rights Reserved.
Contributor(s): ______________________________________.
*/
package org.thdl.tib.text.ttt;
import junit.framework.TestCase;
import org.thdl.util.ThdlOptions;
/** Tests EwtsToUnicodeForXslt at the unit level. For such a class, a
* much more important test is one that actually uses XSLT.
* TODO(dchandler): write such a test. You may even be able to use
* JUnit for it.
*
* @author David Chandler */
public class EwtsToUnicodeForXsltTest extends TestCase {
/** Invokes a text UI and runs all this class's tests. */
public static void main(String[] args) {
junit.textui.TestRunner.run(EwtsToUnicodeForXsltTest.class);
}
protected void setUp() {
// We don't want to use options.txt:
ThdlOptions.forTestingOnlyInitializeWithoutDefaultOptionsFile();
ThdlOptions.setUserPreference("thdl.acip.to.tibetan.warning.and.error.severities.are.built.in.defaults", "true");
ThdlOptions.setUserPreference("thdl.acip.to.tibetan.warning.severity.507", "Most");
ErrorsAndWarnings.setupSeverityMap();
// We don't want to load the TM or TMW font files ourselves:
ThdlOptions.setUserPreference("thdl.rely.on.system.tmw.fonts", true);
ThdlOptions.setUserPreference("thdl.rely.on.system.tm.fonts", true);
ThdlOptions.setUserPreference("thdl.debug", true);
}
public EwtsToUnicodeForXsltTest() { }
private static void help(String ewts, String expected) {
String actual = EwtsToUnicodeForXslt.convertEwtsTo(ewts);
assertEquals(expected, actual);
}
public void testIt() throws java.io.IOException {
help("ga",
"\u0f42");
help("\u0f00\u0f01\u0f02 \u0f03 \u0fcf",
"\u0f00\u0f01\u0f02\u0f0b\u0f03\u0f0b"
+ "\u0fcf");
// TODO(dchandler): I think EWTS->Tibetan ought to not give errors
// about the disambiguators here:
// help("\u0f00.\u0f01.\u0f02 \u0f03 \u0fcf",
// "\u0f00\u0f01\u0f02\u0f0b\u0f03\u0f0b"
// + "\u0fcf");
help("k+Shu+A+i+o+eHM",
"\u0f40\u0fb5\u0f71\u0f74\u0f72\u0f7a\u0f7c\u0f7e"
+ "\u0f7f");
help(" . ",
"\u0f0b[#ERROR 130: The tsheg bar (\"syllable\") {.} is"
+ " essentially nothing.]\u0f0b");
}
}

View file

@ -196,18 +196,18 @@ public class TConverter {
loc[0] == tdoc.getLength());
}
/** Returns UTF-8 encoded Unicode. A bit indirect, so use this
* for testing only if performance is a concern. If errors occur
* in scanning the transliteration or in converting a tsheg bar,
* then they are appended to errors if errors is non-null, as
* well as written to the result. If warnings occur in scanning
* the transliteration or in converting a tsheg bar, then they
* are appended to warnings if warnings is non-null, and they are
* written to the result if writeWarningsToResult is true. Error
* and warning messages are long and self-contained unless
* shortMessages is true. Returns the conversion upon perfect
* success or if there were merely warnings, null if errors
* occurred. */
/** Returns the Unicode that the given translit corresponds to. A
* bit indirect, so use this for testing only if performance is a
* concern. If errors occur in scanning the transliteration or
* in converting a tsheg bar, then they are appended to errors if
* errors is non-null, as well as written to the result. If
* warnings occur in scanning the transliteration or in
* converting a tsheg bar, then they are appended to warnings if
* warnings is non-null, and they are written to the result if
* writeWarningsToResult is true. Error and warning messages are
* long and self-contained unless shortMessages is true. Returns
* the conversion upon perfect success or if there were merely
* warnings, null if errors occurred. */
public static String convertToUnicodeText(TTraits ttraits,
String translit,
StringBuffer errors,
@ -229,6 +229,9 @@ public class TConverter {
return null;
}
} catch (IOException e) {
// Won't happen. UTF-8 is guaranteed to be a supported
// encoding, and ByteArrayOutputStreams don't have such
// problems I don't think.
throw new Error(e.toString());
}
}