Added a Unicode->ASCII dump for debugging *->Unicode conversions. To use it, use 'java -cp Jskad.jar org.thdl.util.VerboseUnicodeDump'.
This commit is contained in:
parent
9dd95c5524
commit
6fdb2a26bb
2 changed files with 57 additions and 0 deletions
|
@ -320,6 +320,13 @@ Contributor(s): ______________________________________.
|
|||
<param name="my.included.source.file"
|
||||
value="org/thdl/tib/text/TibetanHTML.java"/>
|
||||
</antcall>
|
||||
<!-- Put org.thdl.VerboseUnicodeDump in Jskad's jar for those who
|
||||
want to use it. -->
|
||||
<antcall target="our-internal-javac-task">
|
||||
<param name="mybin" value="${jskadbin}"/>
|
||||
<param name="my.included.source.file"
|
||||
value="org/thdl/util/VerboseUnicodeDump.java"/>
|
||||
</antcall>
|
||||
<!-- Put TibetanConverter and ACIPConverter in Jskad's jar for
|
||||
those who want to use them. -->
|
||||
<antcall target="our-internal-javac-task">
|
||||
|
|
50
source/org/thdl/util/VerboseUnicodeDump.java
Normal file
50
source/org/thdl/util/VerboseUnicodeDump.java
Normal file
|
@ -0,0 +1,50 @@
|
|||
/*
|
||||
The contents of this file are subject to the THDL Open Community License
|
||||
Version 1.0 (the "License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License on the THDL web site
|
||||
(http://www.thdl.org/).
|
||||
|
||||
Software distributed under the License is distributed on an "AS IS" basis,
|
||||
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
|
||||
License for the specific terms governing rights and limitations under the
|
||||
License.
|
||||
|
||||
The Initial Developer of this software is the Tibetan and Himalayan Digital
|
||||
Library (THDL). Portions created by the THDL are Copyright 2003 THDL.
|
||||
All Rights Reserved.
|
||||
|
||||
Contributor(s): ______________________________________.
|
||||
*/
|
||||
|
||||
package org.thdl.util;
|
||||
|
||||
/** <p>VerboseUnicodeDump is a utility for reading in a Unicode text
|
||||
file and outputting human-readable stuff. This stuff is like the
|
||||
following:</p>
|
||||
|
||||
<pre>
|
||||
0f40
|
||||
0f0d
|
||||
0020
|
||||
</pre>
|
||||
|
||||
<p>One might use this to debug ACIP->Unicode conversions, for
|
||||
example.</p>
|
||||
|
||||
@author David Chandler */
|
||||
public class VerboseUnicodeDump {
|
||||
public static void main(String args[]) throws Exception {
|
||||
if (args.length != 2) {
|
||||
System.err.println("bad args, need filename UTF-8|UTF-16LE|UTF-16|UTF-16BE|US-ASCII|...");
|
||||
System.exit(1);
|
||||
}
|
||||
java.io.Reader fr
|
||||
= new java.io.InputStreamReader(new java.io.FileInputStream(args[0]),
|
||||
java.nio.charset.Charset.forName(args[1]));
|
||||
int x;
|
||||
while (-1 != (x = fr.read())) {
|
||||
System.out.println(org.thdl.tib.text.tshegbar.UnicodeUtils.unicodeCodepointToString((char)x, false, ""));
|
||||
}
|
||||
System.exit(0);
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue