Added a class for performing EWTS->Unicode conversions during XSLT
transformations. I haven't actually used it with Xalan XSLT yet, but it ought to work if TibetanHTML did (which it must have at one point). I do have a unit test, but an end-to-end test with Xalan is what we need.
This commit is contained in:
parent
6260c0889d
commit
dc18165992
5 changed files with 153 additions and 12 deletions
46
source/org/thdl/tib/text/ttt/EwtsToUnicodeForXslt.java
Normal file
46
source/org/thdl/tib/text/ttt/EwtsToUnicodeForXslt.java
Normal file
|
@ -0,0 +1,46 @@
|
|||
/*
|
||||
The contents of this file are subject to the THDL Open Community License
|
||||
Version 1.0 (the "License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License on the THDL web site
|
||||
(http://www.thdl.org/).
|
||||
|
||||
Software distributed under the License is distributed on an "AS IS" basis,
|
||||
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
|
||||
License for the specific terms governing rights and limitations under the
|
||||
License.
|
||||
|
||||
The Initial Developer of this software is the Tibetan and Himalayan Digital
|
||||
Library (THDL). Portions created by the THDL are Copyright 2005 THDL.
|
||||
All Rights Reserved.
|
||||
|
||||
Contributor(s): ______________________________________.
|
||||
*/
|
||||
|
||||
package org.thdl.tib.text.ttt;
|
||||
|
||||
/** A class for use in XSL transformations that converts EWTS
|
||||
* transliteration to Unicode. This is intended to be used by Xalan
|
||||
* XSLT to convert an XML document that uses Wylie into
|
||||
* HTML/text/whatever that uses Unicode (probably TibetanMachineUni
|
||||
* font).
|
||||
* @author David Chandler
|
||||
*/
|
||||
public class EwtsToUnicodeForXslt {
|
||||
/** Static methods provide all the fun! */
|
||||
private EwtsToUnicodeForXslt() {
|
||||
throw new Error("There's no point in instantiating this class.");
|
||||
}
|
||||
|
||||
/** Converts EWTS transliteration into Tibetan Unicode.
|
||||
* TODO(dchandler): must we worry about the encoding, UTF-8
|
||||
* vs. UTF-16LE e.g.? */
|
||||
public static String convertEwtsTo(String ewts) {
|
||||
return TConverter.convertToUnicodeText(EWTSTraits.instance(),
|
||||
ewts,
|
||||
new StringBuffer(),
|
||||
null,
|
||||
false,
|
||||
"None",
|
||||
false);
|
||||
}
|
||||
}
|
79
source/org/thdl/tib/text/ttt/EwtsToUnicodeForXsltTest.java
Normal file
79
source/org/thdl/tib/text/ttt/EwtsToUnicodeForXsltTest.java
Normal file
|
@ -0,0 +1,79 @@
|
|||
/*
|
||||
The contents of this file are subject to the THDL Open Community License
|
||||
Version 1.0 (the "License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License on the THDL web site
|
||||
(http://www.thdl.org/).
|
||||
|
||||
Software distributed under the License is distributed on an "AS IS" basis,
|
||||
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
|
||||
License for the specific terms governing rights and limitations under the
|
||||
License.
|
||||
|
||||
The Initial Developer of this software is the Tibetan and Himalayan Digital
|
||||
Library (THDL). Portions created by the THDL are Copyright 2005 THDL.
|
||||
All Rights Reserved.
|
||||
|
||||
Contributor(s): ______________________________________.
|
||||
*/
|
||||
|
||||
package org.thdl.tib.text.ttt;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
import org.thdl.util.ThdlOptions;
|
||||
|
||||
/** Tests EwtsToUnicodeForXslt at the unit level. For such a class, a
|
||||
* much more important test is one that actually uses XSLT.
|
||||
* TODO(dchandler): write such a test. You may even be able to use
|
||||
* JUnit for it.
|
||||
*
|
||||
* @author David Chandler */
|
||||
public class EwtsToUnicodeForXsltTest extends TestCase {
|
||||
|
||||
/** Invokes a text UI and runs all this class's tests. */
|
||||
public static void main(String[] args) {
|
||||
junit.textui.TestRunner.run(EwtsToUnicodeForXsltTest.class);
|
||||
}
|
||||
|
||||
protected void setUp() {
|
||||
// We don't want to use options.txt:
|
||||
ThdlOptions.forTestingOnlyInitializeWithoutDefaultOptionsFile();
|
||||
|
||||
ThdlOptions.setUserPreference("thdl.acip.to.tibetan.warning.and.error.severities.are.built.in.defaults", "true");
|
||||
ThdlOptions.setUserPreference("thdl.acip.to.tibetan.warning.severity.507", "Most");
|
||||
ErrorsAndWarnings.setupSeverityMap();
|
||||
|
||||
// We don't want to load the TM or TMW font files ourselves:
|
||||
ThdlOptions.setUserPreference("thdl.rely.on.system.tmw.fonts", true);
|
||||
ThdlOptions.setUserPreference("thdl.rely.on.system.tm.fonts", true);
|
||||
ThdlOptions.setUserPreference("thdl.debug", true);
|
||||
}
|
||||
|
||||
|
||||
public EwtsToUnicodeForXsltTest() { }
|
||||
|
||||
private static void help(String ewts, String expected) {
|
||||
String actual = EwtsToUnicodeForXslt.convertEwtsTo(ewts);
|
||||
assertEquals(expected, actual);
|
||||
}
|
||||
|
||||
public void testIt() throws java.io.IOException {
|
||||
help("ga",
|
||||
"\u0f42");
|
||||
help("\u0f00\u0f01\u0f02 \u0f03 \u0fcf",
|
||||
"\u0f00\u0f01\u0f02\u0f0b\u0f03\u0f0b"
|
||||
+ "\u0fcf");
|
||||
// TODO(dchandler): I think EWTS->Tibetan ought to not give errors
|
||||
// about the disambiguators here:
|
||||
// help("\u0f00.\u0f01.\u0f02 \u0f03 \u0fcf",
|
||||
// "\u0f00\u0f01\u0f02\u0f0b\u0f03\u0f0b"
|
||||
// + "\u0fcf");
|
||||
help("k+Shu+A+i+o+eHM",
|
||||
"\u0f40\u0fb5\u0f71\u0f74\u0f72\u0f7a\u0f7c\u0f7e"
|
||||
+ "\u0f7f");
|
||||
help(" . ",
|
||||
"\u0f0b[#ERROR 130: The tsheg bar (\"syllable\") {.} is"
|
||||
+ " essentially nothing.]\u0f0b");
|
||||
}
|
||||
}
|
||||
|
|
@ -196,18 +196,18 @@ public class TConverter {
|
|||
loc[0] == tdoc.getLength());
|
||||
}
|
||||
|
||||
/** Returns UTF-8 encoded Unicode. A bit indirect, so use this
|
||||
* for testing only if performance is a concern. If errors occur
|
||||
* in scanning the transliteration or in converting a tsheg bar,
|
||||
* then they are appended to errors if errors is non-null, as
|
||||
* well as written to the result. If warnings occur in scanning
|
||||
* the transliteration or in converting a tsheg bar, then they
|
||||
* are appended to warnings if warnings is non-null, and they are
|
||||
* written to the result if writeWarningsToResult is true. Error
|
||||
* and warning messages are long and self-contained unless
|
||||
* shortMessages is true. Returns the conversion upon perfect
|
||||
* success or if there were merely warnings, null if errors
|
||||
* occurred. */
|
||||
/** Returns the Unicode that the given translit corresponds to. A
|
||||
* bit indirect, so use this for testing only if performance is a
|
||||
* concern. If errors occur in scanning the transliteration or
|
||||
* in converting a tsheg bar, then they are appended to errors if
|
||||
* errors is non-null, as well as written to the result. If
|
||||
* warnings occur in scanning the transliteration or in
|
||||
* converting a tsheg bar, then they are appended to warnings if
|
||||
* warnings is non-null, and they are written to the result if
|
||||
* writeWarningsToResult is true. Error and warning messages are
|
||||
* long and self-contained unless shortMessages is true. Returns
|
||||
* the conversion upon perfect success or if there were merely
|
||||
* warnings, null if errors occurred. */
|
||||
public static String convertToUnicodeText(TTraits ttraits,
|
||||
String translit,
|
||||
StringBuffer errors,
|
||||
|
@ -229,6 +229,9 @@ public class TConverter {
|
|||
return null;
|
||||
}
|
||||
} catch (IOException e) {
|
||||
// Won't happen. UTF-8 is guaranteed to be a supported
|
||||
// encoding, and ByteArrayOutputStreams don't have such
|
||||
// problems I don't think.
|
||||
throw new Error(e.toString());
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue