From 6fdb2a26bb7d7e5dc8b3664d7397ccb4f0034ea6 Mon Sep 17 00:00:00 2001 From: dchandler Date: Sat, 17 Jan 2004 16:52:38 +0000 Subject: [PATCH] Added a Unicode->ASCII dump for debugging *->Unicode conversions. To use it, use 'java -cp Jskad.jar org.thdl.util.VerboseUnicodeDump'. --- build.xml | 7 +++ source/org/thdl/util/VerboseUnicodeDump.java | 50 ++++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100644 source/org/thdl/util/VerboseUnicodeDump.java diff --git a/build.xml b/build.xml index 26415d3..93eb254 100644 --- a/build.xml +++ b/build.xml @@ -320,6 +320,13 @@ Contributor(s): ______________________________________. + + + + + diff --git a/source/org/thdl/util/VerboseUnicodeDump.java b/source/org/thdl/util/VerboseUnicodeDump.java new file mode 100644 index 0000000..b864a0a --- /dev/null +++ b/source/org/thdl/util/VerboseUnicodeDump.java @@ -0,0 +1,50 @@ +/* +The contents of this file are subject to the THDL Open Community License +Version 1.0 (the "License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License on the THDL web site +(http://www.thdl.org/). + +Software distributed under the License is distributed on an "AS IS" basis, +WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +License for the specific terms governing rights and limitations under the +License. + +The Initial Developer of this software is the Tibetan and Himalayan Digital +Library (THDL). Portions created by the THDL are Copyright 2003 THDL. +All Rights Reserved. + +Contributor(s): ______________________________________. +*/ + +package org.thdl.util; + +/**

VerboseUnicodeDump is a utility for reading in a Unicode text + file and outputting human-readable stuff. This stuff is like the + following:

+ +
+0f40
+0f0d
+0020
+
+ +

One might use this to debug ACIP->Unicode conversions, for + example.

+ + @author David Chandler */ +public class VerboseUnicodeDump { + public static void main(String args[]) throws Exception { + if (args.length != 2) { + System.err.println("bad args, need filename UTF-8|UTF-16LE|UTF-16|UTF-16BE|US-ASCII|..."); + System.exit(1); + } + java.io.Reader fr + = new java.io.InputStreamReader(new java.io.FileInputStream(args[0]), + java.nio.charset.Charset.forName(args[1])); + int x; + while (-1 != (x = fr.read())) { + System.out.println(org.thdl.tib.text.tshegbar.UnicodeUtils.unicodeCodepointToString((char)x, false, "")); + } + System.exit(0); + } +}