ACIP->TMW and ACIP->Unicode have my pre-stamp of non-approval. Except

for (NYAx} and {NYAo}, they're as good as I'll get them without input from experts of the employ of a complementary, syllabary-based approach.
2003-09-04 04:34:18 +00:00 · 2003-09-04 04:34:18 +00:00 · cc615f34df
commit cc615f34df
parent ae7a7577bc
5 changed files with 38 additions and 22 deletions
--- a/source/org/thdl/tib/input/TibetanConverter.java
+++ b/source/org/thdl/tib/input/TibetanConverter.java
@ -48,15 +48,12 @@ public class TibetanConverter implements FontConverterConstants {
    static final String rtfErrorMessage
        = "The Rich Text Format (RTF) file selected contains constructs that\nJskad cannot handle.  If you got the RTF file from saving a Word\ndocument as RTF, try saving that same document as RTF in\nWord 2000 instead of Word XP or in Word 97 instead of\nWord 2000.  Older versions of Word produce RTF that Jskad\ncan more easily deal with.  OpenOffice and StarOffice may also\nproduce better-behaved RTF.";

-    static {
-        // No need for the TM or TMW fonts.
-        System.setProperty("thdl.rely.on.system.tmw.fonts", "true");
-        System.setProperty("thdl.rely.on.system.tm.fonts", "true");
-    }
-
    /**
     *  Runs the converter. */
 	public static void main(String[] args) {
+        // No need for the TM or TMW fonts.
+        System.setProperty("thdl.rely.on.system.tmw.fonts", "true");
+        System.setProperty("thdl.rely.on.system.tm.fonts", "true");

        // Runs on Linux/Unix boxes without X11 servers:
        System.setProperty("java.awt.headless", "true");
@ -108,32 +105,46 @@ public class TibetanConverter implements FontConverterConstants {
                         || (findAllNonTMMode
                             = args[0].equals("--find-all-non-tm"))
                ))) {
-                out.println("TibetanConverter [--find-all-non-tmw | --find-some-non-tmw");
-                out.println("                  | --to-tibetan-machine | --to-tibetan-machine-web");
-                out.println("                  | --to-unicode | --to-wylie | --to-acip] RTF_file");
-                out.println(" | TibetanConverter --acip-to-unicode TXT_file");
+                out.println("TibetanConverter --find-all-non-tmw | --find-some-non-tmw");
+                out.println("                 | --to-tibetan-machine | --to-tibetan-machine-web");
+                out.println("                 | --to-unicode | --to-wylie | --to-acip RTF_file");
+                out.println(" | TibetanConverter --acip-to-unicode | --acip-to-tmw TXT_file");
                out.println(" | TibetanConverter [--version | -v | --help | -h]");
                out.println("");
                out.println("Distributed under the terms of the THDL Open Community License Version 1.0.");
                out.println("");
                out.println("Usage:");
                out.println(" -v | --version for version info");
+                out.println("");
                out.println(" -h | --help for this message");
+                out.println("");
                out.println(" --to-tibetan-machine to convert TibetanMachineWeb to TibetanMachine");
+                out.println("");
                out.println(" --to-unicode to convert TibetanMachineWeb to Unicode");
+                out.println("");
                out.println(" --to-tibetan-machine-web to convert TibetanMachine to TibetanMachineWeb");
+                out.println("");
                out.println(" --to-wylie to convert TibetanMachineWeb to THDL Extended Wylie");
+                out.println("");
                out.println(" --to-acip to convert TibetanMachineWeb to ACIP");
+                out.println("");
                out.println(" --acip-to-unicode to convert ACIP text file to Unicode text file");
+                out.println("");
+                out.println(" --acip-to-tmw to convert ACIP text file to Tibetan Machine Web RTF File.");
+                out.println("");
                out.println(" --find-all-non-tmw to locate all characters in the input document that are");
                out.println("   not in Tibetan Machine Web fonts, exit zero if and only if none found");
+                out.println("");
                out.println(" --find-some-non-tmw to locate all distinct characters in the input document");
                out.println("   not in Tibetan Machine Web fonts, exit zero if and only if none found");
+                out.println("");
                out.println(" --find-all-non-tm to locate all characters in the input document that are");
                out.println("   not in Tibetan Machine fonts, exit zero if and only if none found");
+                out.println("");
                out.println(" --find-some-non-tm to locate all distinct characters in the input document");
                out.println("   not in Tibetan Machine fonts, exit zero if and only if none found");
                out.println("");
+                out.println("");
                out.println(" In --to... and --acip-to... modes, needs one argument, the name of the");
                out.println(" TibetanMachineWeb RTF");
                out.println(" file (for --to-wylie, --to-unicode, and --to-tibetan-machine) or the name of");
--- a/source/org/thdl/tib/text/ttt/ACIPConverter.java
+++ b/source/org/thdl/tib/text/ttt/ACIPConverter.java
@ -132,7 +132,7 @@ public class ACIPConverter {
        throws IOException
    {
        TibetanDocument tdoc = new TibetanDocument();
-		tdoc.setRomanAttributeSet("Courier", 14); // DLC make me configurable.
+		tdoc.setRomanAttributeSet("Courier", 20); // DLC make me configurable.
        boolean rv
            = convertToTMW(scan, tdoc, errors, warnings,
                           writeWarningsToResult, warningLevel);
@ -393,11 +393,13 @@ public class ACIPConverter {
                            if (!done) {
                                if (null != writer) unicode = ACIPRules.getUnicodeFor(s.getText(), false);
                                if (null != tdoc) {
-                                    if (s.getText().equals("\r") || s.getText().equals("\t") || s.getText().equals("\n")) {
+                                    if (s.getText().equals("\r")
+                                        || s.getText().equals("\t")
+                                        || s.getText().equals("\n")
+                                        || s.getText().equals("\r\n")) {
                                        tdoc.appendRoman(s.getText());
                                        continue;
-                                    }
-                                    else {
+                                    } else {
                                        String wy = ACIPRules.getWylieForACIPOther(s.getText());
                                        if (null == wy) throw new Error("No wylie for ACIP " + s.getText());
                                        duff = new DuffCode[] { TibetanMachineWeb.getGlyph(wy) };
--- a/source/org/thdl/tib/text/ttt/ACIPRules.java
+++ b/source/org/thdl/tib/text/ttt/ACIPRules.java
@ -222,7 +222,7 @@ class ACIPRules {
     *  mark.  Returns null if there is no such EWTS. */
    static final String getWylieForACIPOther(String acip) {
        if (acipOther2wylie == null) {
-            acipOther2wylie = new HashMap(37);
+            acipOther2wylie = new HashMap(20);

            // DLC FIXME: check all these again.
            acipOther2wylie.put(",", "/");
--- a/source/org/thdl/tib/text/ttt/ACIPTshegBarScanner.java
+++ b/source/org/thdl/tib/text/ttt/ACIPTshegBarScanner.java
@ -788,11 +788,14 @@ public class ACIPTshegBarScanner {
                // Don't add in a "\r\n" or "\n" unless there's a
                // blank line.
                boolean rn = false;
+                boolean realNewline = false;
                if (('\n' != ch && '\r' != ch)
-                    || ((rn = ('\n' == ch && i >= 3 && s.charAt(i-3) == '\r' && s.charAt(i-2) == '\n' && s.charAt(i-1) == '\r'))
-                        || ('\n' == ch && i >= 1 && s.charAt(i-1) == '\n'))) {
-                    al.add(new ACIPString(rn ? s.substring(i - 1, i+1) : s.substring(i, i+1),
-                                          ACIPString.TIBETAN_PUNCTUATION));
+                    || (realNewline
+                        = ((rn = ('\n' == ch && i >= 3 && s.charAt(i-3) == '\r' && s.charAt(i-2) == '\n' && s.charAt(i-1) == '\r'))
+                           || ('\n' == ch && i >= 1 && s.charAt(i-1) == '\n')))) {
+                    for (int h = 0; h < (realNewline ? 2 : 1); h++)
+                        al.add(new ACIPString(rn ? s.substring(i - 1, i+1) : s.substring(i, i+1),
+                                              ACIPString.TIBETAN_PUNCTUATION));
                }
                startOfString = i+1;
                currentType = ACIPString.ERROR;
--- a/source/org/thdl/tib/text/ttt/PackageTest.java
+++ b/source/org/thdl/tib/text/ttt/PackageTest.java
@ -7195,11 +7195,11 @@ tstHelper("ZUR");

        shelp("KA KHA\n\nGA NGA \nTA THA\n\nDA NA\n",
              "",
-              "[TIBETAN_NON_PUNCTUATION:{KA}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{KHA}, TIBETAN_PUNCTUATION:{ }, TIBETAN_PUNCTUATION:{\n}, TIBETAN_NON_PUNCTUATION:{GA}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{NGA}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{TA}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{THA}, TIBETAN_PUNCTUATION:{ }, TIBETAN_PUNCTUATION:{\n}, TIBETAN_NON_PUNCTUATION:{DA}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{NA}, TIBETAN_PUNCTUATION:{ }]");
+              "[TIBETAN_NON_PUNCTUATION:{KA}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{KHA}, TIBETAN_PUNCTUATION:{ }, TIBETAN_PUNCTUATION:{\n}, TIBETAN_PUNCTUATION:{\n}, TIBETAN_NON_PUNCTUATION:{GA}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{NGA}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{TA}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{THA}, TIBETAN_PUNCTUATION:{ }, TIBETAN_PUNCTUATION:{\n}, TIBETAN_PUNCTUATION:{\n}, TIBETAN_NON_PUNCTUATION:{DA}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{NA}, TIBETAN_PUNCTUATION:{ }]");

        shelp("[FIRST][SECOND][MISSING PAGE][MISSING FOLIO]", "");
-        shelp("[THE INITIAL PART OF THIS TEXT WAS INPUT BY THE SERA MEY LIBRARY IN\nTIBETAN FONT AND NEEDS TO BE REDONE BY DOUBLE INPUT]\r\n\r\n", "", "[COMMENT:{[#THE INITIAL PART OF THIS TEXT WAS INPUT BY THE SERA MEY LIBRARY IN\nTIBETAN FONT AND NEEDS TO BE REDONE BY DOUBLE INPUT]}, TIBETAN_PUNCTUATION:{\r\n}]");
-        shelp("[THE INITIAL PART OF THIS TEXT WAS INPUT BY THE SERA MEY LIBRARY IN\r\nTIBETAN FONT AND NEEDS TO BE REDONE BY DOUBLE INPUT]\r\n\r\n", "", "[COMMENT:{[#THE INITIAL PART OF THIS TEXT WAS INPUT BY THE SERA MEY LIBRARY IN\r\nTIBETAN FONT AND NEEDS TO BE REDONE BY DOUBLE INPUT]}, TIBETAN_PUNCTUATION:{\r\n}]");
+        shelp("[THE INITIAL PART OF THIS TEXT WAS INPUT BY THE SERA MEY LIBRARY IN\nTIBETAN FONT AND NEEDS TO BE REDONE BY DOUBLE INPUT]\r\n\r\n", "", "[COMMENT:{[#THE INITIAL PART OF THIS TEXT WAS INPUT BY THE SERA MEY LIBRARY IN\nTIBETAN FONT AND NEEDS TO BE REDONE BY DOUBLE INPUT]}, TIBETAN_PUNCTUATION:{\r\n}, TIBETAN_PUNCTUATION:{\r\n}]");
+        shelp("[THE INITIAL PART OF THIS TEXT WAS INPUT BY THE SERA MEY LIBRARY IN\r\nTIBETAN FONT AND NEEDS TO BE REDONE BY DOUBLE INPUT]\r\n\r\n", "", "[COMMENT:{[#THE INITIAL PART OF THIS TEXT WAS INPUT BY THE SERA MEY LIBRARY IN\r\nTIBETAN FONT AND NEEDS TO BE REDONE BY DOUBLE INPUT]}, TIBETAN_PUNCTUATION:{\r\n}, TIBETAN_PUNCTUATION:{\r\n}]");

        // Test folio markers:
        shelp("@01A.3 ", "", "[FOLIO_MARKER:{@01A.3}, TIBETAN_PUNCTUATION:{ }]");