2004-01-17 16:52:38 +00:00
|
|
|
/*
|
|
|
|
The contents of this file are subject to the THDL Open Community License
|
|
|
|
Version 1.0 (the "License"); you may not use this file except in compliance
|
|
|
|
with the License. You may obtain a copy of the License on the THDL web site
|
|
|
|
(http://www.thdl.org/).
|
|
|
|
|
|
|
|
Software distributed under the License is distributed on an "AS IS" basis,
|
|
|
|
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
|
|
|
|
License for the specific terms governing rights and limitations under the
|
|
|
|
License.
|
|
|
|
|
|
|
|
The Initial Developer of this software is the Tibetan and Himalayan Digital
|
|
|
|
Library (THDL). Portions created by the THDL are Copyright 2003 THDL.
|
|
|
|
All Rights Reserved.
|
|
|
|
|
|
|
|
Contributor(s): ______________________________________.
|
|
|
|
*/
|
|
|
|
|
|
|
|
package org.thdl.util;
|
|
|
|
|
|
|
|
/** <p>VerboseUnicodeDump is a utility for reading in a Unicode text
|
|
|
|
file and outputting human-readable stuff. This stuff is like the
|
|
|
|
following:</p>
|
|
|
|
|
|
|
|
<pre>
|
|
|
|
0f40
|
|
|
|
0f0d
|
|
|
|
0020
|
|
|
|
</pre>
|
|
|
|
|
|
|
|
<p>One might use this to debug ACIP->Unicode conversions, for
|
|
|
|
example.</p>
|
|
|
|
|
|
|
|
@author David Chandler */
|
|
|
|
public class VerboseUnicodeDump {
|
|
|
|
public static void main(String args[]) throws Exception {
|
|
|
|
if (args.length != 2) {
|
|
|
|
System.err.println("bad args, need filename UTF-8|UTF-16LE|UTF-16|UTF-16BE|US-ASCII|...");
|
|
|
|
System.exit(1);
|
|
|
|
}
|
|
|
|
java.io.Reader fr
|
|
|
|
= new java.io.InputStreamReader(new java.io.FileInputStream(args[0]),
|
|
|
|
java.nio.charset.Charset.forName(args[1]));
|
|
|
|
int x;
|
|
|
|
while (-1 != (x = fr.read())) {
|
Numerous EWTS->Unicode and especially EWTS->TMW improvements.
Fixed ordering of Unicode wowels. [ku+A] gives the correct Unicode
now, e.g.
EWTS->TMW looks better for some wacky wowels like, I'm guessing here, [ku+A].
EWTS->TMW should now give errors any time the full input isn't used.
Previously, wacky wowels like [kai+-i] would lead to some droppage.
EWTS->TMW->Unicode testing is now in effect. This found a ton of
EWTS->TMW bugs, most or all of which are fixed now.
TMW->Unicode is improved/fixed for {
\u5350,\u534D,\u0F88+k,\u0F88+kh,U }. (Why U? "\u0f75" is
discouraged in favor of "\u0f71\u0f74".)
NOTE: TMW_RTF_TO_THDL_WYLIETest is still disabled for the nightly
builds' sake, but I ran it in my sandbox and it passed.
2005-07-11 02:51:06 +00:00
|
|
|
System.out.println(org.thdl.tib.text.tshegbar.UnicodeUtils.unicodeCodepointToString((char)x, false, "", false));
|
2004-01-17 16:52:38 +00:00
|
|
|
}
|
|
|
|
System.exit(0);
|
|
|
|
}
|
|
|
|
}
|