Added a Unicode->ASCII dump for debugging *->Unicode conversions. To use it, use 'java -cp Jskad.jar org.thdl.util.VerboseUnicodeDump'.

This commit is contained in:
dchandler 2004-01-17 16:52:38 +00:00
parent 9dd95c5524
commit 6fdb2a26bb
2 changed files with 57 additions and 0 deletions

View file

@ -320,6 +320,13 @@ Contributor(s): ______________________________________.
<param name="my.included.source.file"
value="org/thdl/tib/text/TibetanHTML.java"/>
</antcall>
<!-- Put org.thdl.VerboseUnicodeDump in Jskad's jar for those who
want to use it. -->
<antcall target="our-internal-javac-task">
<param name="mybin" value="${jskadbin}"/>
<param name="my.included.source.file"
value="org/thdl/util/VerboseUnicodeDump.java"/>
</antcall>
<!-- Put TibetanConverter and ACIPConverter in Jskad's jar for
those who want to use them. -->
<antcall target="our-internal-javac-task">

View file

@ -0,0 +1,50 @@
/*
The contents of this file are subject to the THDL Open Community License
Version 1.0 (the "License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License on the THDL web site
(http://www.thdl.org/).
Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
License for the specific terms governing rights and limitations under the
License.
The Initial Developer of this software is the Tibetan and Himalayan Digital
Library (THDL). Portions created by the THDL are Copyright 2003 THDL.
All Rights Reserved.
Contributor(s): ______________________________________.
*/
package org.thdl.util;
/** <p>VerboseUnicodeDump is a utility for reading in a Unicode text
file and outputting human-readable stuff. This stuff is like the
following:</p>
<pre>
0f40
0f0d
0020
</pre>
<p>One might use this to debug ACIP-&gt;Unicode conversions, for
example.</p>
@author David Chandler */
public class VerboseUnicodeDump {
public static void main(String args[]) throws Exception {
if (args.length != 2) {
System.err.println("bad args, need filename UTF-8|UTF-16LE|UTF-16|UTF-16BE|US-ASCII|...");
System.exit(1);
}
java.io.Reader fr
= new java.io.InputStreamReader(new java.io.FileInputStream(args[0]),
java.nio.charset.Charset.forName(args[1]));
int x;
while (-1 != (x = fr.read())) {
System.out.println(org.thdl.tib.text.tshegbar.UnicodeUtils.unicodeCodepointToString((char)x, false, ""));
}
System.exit(0);
}
}