Added a Unicode->ASCII dump for debugging *->Unicode conversions. To use it, use 'java -cp Jskad.jar org.thdl.util.VerboseUnicodeDump'.

2004-01-17 16:52:38 +00:00 · 2004-01-17 16:52:38 +00:00 · 6fdb2a26bb
commit 6fdb2a26bb
parent 9dd95c5524
2 changed files with 57 additions and 0 deletions
--- a/build.xml
+++ b/build.xml
@ -320,6 +320,13 @@ Contributor(s): ______________________________________.
      <param name="my.included.source.file"
             value="org/thdl/tib/text/TibetanHTML.java"/>
    </antcall>
+    <!-- Put org.thdl.VerboseUnicodeDump in Jskad's jar for those who
+         want to use it. -->
+    <antcall target="our-internal-javac-task">
+      <param name="mybin" value="${jskadbin}"/>
+      <param name="my.included.source.file"
+             value="org/thdl/util/VerboseUnicodeDump.java"/>
+    </antcall>
    <!-- Put TibetanConverter and ACIPConverter in Jskad's jar for
         those who want to use them. -->
    <antcall target="our-internal-javac-task">
--- a/source/org/thdl/util/VerboseUnicodeDump.java
+++ b/source/org/thdl/util/VerboseUnicodeDump.java
@ -0,0 +1,50 @@
+/*
+The contents of this file are subject to the THDL Open Community License
+Version 1.0 (the "License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License on the THDL web site 
+(http://www.thdl.org/).
+
+Software distributed under the License is distributed on an "AS IS" basis, 
+WITHOUT WARRANTY OF ANY KIND, either express or implied. See the 
+License for the specific terms governing rights and limitations under the 
+License. 
+
+The Initial Developer of this software is the Tibetan and Himalayan Digital
+Library (THDL). Portions created by the THDL are Copyright 2003 THDL.
+All Rights Reserved. 
+
+Contributor(s): ______________________________________.
+*/
+
+package org.thdl.util;
+
+/** <p>VerboseUnicodeDump is a utility for reading in a Unicode text
+    file and outputting human-readable stuff. This stuff is like the
+    following:</p>
+
+<pre>
+0f40
+0f0d
+0020
+</pre>
+
+    <p>One might use this to debug ACIP-&gt;Unicode conversions, for
+    example.</p>
+
+    @author David Chandler */
+public class VerboseUnicodeDump {
+    public static void main(String args[]) throws Exception {
+        if (args.length != 2) {
+            System.err.println("bad args, need filename UTF-8|UTF-16LE|UTF-16|UTF-16BE|US-ASCII|...");
+            System.exit(1);
+        }
+        java.io.Reader fr
+	    = new java.io.InputStreamReader(new java.io.FileInputStream(args[0]),
+					    java.nio.charset.Charset.forName(args[1]));
+        int x;
+        while (-1 != (x = fr.read())) {
+            System.out.println(org.thdl.tib.text.tshegbar.UnicodeUtils.unicodeCodepointToString((char)x, false, ""));
+        }
+        System.exit(0);
+    }
+}