From cc615f34dfbe913270e60626fd20b78ff2dd9911 Mon Sep 17 00:00:00 2001 From: dchandler Date: Thu, 4 Sep 2003 04:34:18 +0000 Subject: [PATCH] ACIP->TMW and ACIP->Unicode have my pre-stamp of non-approval. Except for (NYAx} and {NYAo}, they're as good as I'll get them without input from experts of the employ of a complementary, syllabary-based approach. --- .../org/thdl/tib/input/TibetanConverter.java | 31 +++++++++++++------ .../org/thdl/tib/text/ttt/ACIPConverter.java | 10 +++--- source/org/thdl/tib/text/ttt/ACIPRules.java | 2 +- .../tib/text/ttt/ACIPTshegBarScanner.java | 11 ++++--- source/org/thdl/tib/text/ttt/PackageTest.java | 6 ++-- 5 files changed, 38 insertions(+), 22 deletions(-) diff --git a/source/org/thdl/tib/input/TibetanConverter.java b/source/org/thdl/tib/input/TibetanConverter.java index 9379179..c07b0d2 100644 --- a/source/org/thdl/tib/input/TibetanConverter.java +++ b/source/org/thdl/tib/input/TibetanConverter.java @@ -48,15 +48,12 @@ public class TibetanConverter implements FontConverterConstants { static final String rtfErrorMessage = "The Rich Text Format (RTF) file selected contains constructs that\nJskad cannot handle. If you got the RTF file from saving a Word\ndocument as RTF, try saving that same document as RTF in\nWord 2000 instead of Word XP or in Word 97 instead of\nWord 2000. Older versions of Word produce RTF that Jskad\ncan more easily deal with. OpenOffice and StarOffice may also\nproduce better-behaved RTF."; - static { - // No need for the TM or TMW fonts. - System.setProperty("thdl.rely.on.system.tmw.fonts", "true"); - System.setProperty("thdl.rely.on.system.tm.fonts", "true"); - } - /** * Runs the converter. */ public static void main(String[] args) { + // No need for the TM or TMW fonts. + System.setProperty("thdl.rely.on.system.tmw.fonts", "true"); + System.setProperty("thdl.rely.on.system.tm.fonts", "true"); // Runs on Linux/Unix boxes without X11 servers: System.setProperty("java.awt.headless", "true"); @@ -108,32 +105,46 @@ public class TibetanConverter implements FontConverterConstants { || (findAllNonTMMode = args[0].equals("--find-all-non-tm")) ))) { - out.println("TibetanConverter [--find-all-non-tmw | --find-some-non-tmw"); - out.println(" | --to-tibetan-machine | --to-tibetan-machine-web"); - out.println(" | --to-unicode | --to-wylie | --to-acip] RTF_file"); - out.println(" | TibetanConverter --acip-to-unicode TXT_file"); + out.println("TibetanConverter --find-all-non-tmw | --find-some-non-tmw"); + out.println(" | --to-tibetan-machine | --to-tibetan-machine-web"); + out.println(" | --to-unicode | --to-wylie | --to-acip RTF_file"); + out.println(" | TibetanConverter --acip-to-unicode | --acip-to-tmw TXT_file"); out.println(" | TibetanConverter [--version | -v | --help | -h]"); out.println(""); out.println("Distributed under the terms of the THDL Open Community License Version 1.0."); out.println(""); out.println("Usage:"); out.println(" -v | --version for version info"); + out.println(""); out.println(" -h | --help for this message"); + out.println(""); out.println(" --to-tibetan-machine to convert TibetanMachineWeb to TibetanMachine"); + out.println(""); out.println(" --to-unicode to convert TibetanMachineWeb to Unicode"); + out.println(""); out.println(" --to-tibetan-machine-web to convert TibetanMachine to TibetanMachineWeb"); + out.println(""); out.println(" --to-wylie to convert TibetanMachineWeb to THDL Extended Wylie"); + out.println(""); out.println(" --to-acip to convert TibetanMachineWeb to ACIP"); + out.println(""); out.println(" --acip-to-unicode to convert ACIP text file to Unicode text file"); + out.println(""); + out.println(" --acip-to-tmw to convert ACIP text file to Tibetan Machine Web RTF File."); + out.println(""); out.println(" --find-all-non-tmw to locate all characters in the input document that are"); out.println(" not in Tibetan Machine Web fonts, exit zero if and only if none found"); + out.println(""); out.println(" --find-some-non-tmw to locate all distinct characters in the input document"); out.println(" not in Tibetan Machine Web fonts, exit zero if and only if none found"); + out.println(""); out.println(" --find-all-non-tm to locate all characters in the input document that are"); out.println(" not in Tibetan Machine fonts, exit zero if and only if none found"); + out.println(""); out.println(" --find-some-non-tm to locate all distinct characters in the input document"); out.println(" not in Tibetan Machine fonts, exit zero if and only if none found"); out.println(""); + out.println(""); out.println(" In --to... and --acip-to... modes, needs one argument, the name of the"); out.println(" TibetanMachineWeb RTF"); out.println(" file (for --to-wylie, --to-unicode, and --to-tibetan-machine) or the name of"); diff --git a/source/org/thdl/tib/text/ttt/ACIPConverter.java b/source/org/thdl/tib/text/ttt/ACIPConverter.java index a6c4ea3..35c06d7 100644 --- a/source/org/thdl/tib/text/ttt/ACIPConverter.java +++ b/source/org/thdl/tib/text/ttt/ACIPConverter.java @@ -132,7 +132,7 @@ public class ACIPConverter { throws IOException { TibetanDocument tdoc = new TibetanDocument(); - tdoc.setRomanAttributeSet("Courier", 14); // DLC make me configurable. + tdoc.setRomanAttributeSet("Courier", 20); // DLC make me configurable. boolean rv = convertToTMW(scan, tdoc, errors, warnings, writeWarningsToResult, warningLevel); @@ -393,11 +393,13 @@ public class ACIPConverter { if (!done) { if (null != writer) unicode = ACIPRules.getUnicodeFor(s.getText(), false); if (null != tdoc) { - if (s.getText().equals("\r") || s.getText().equals("\t") || s.getText().equals("\n")) { + if (s.getText().equals("\r") + || s.getText().equals("\t") + || s.getText().equals("\n") + || s.getText().equals("\r\n")) { tdoc.appendRoman(s.getText()); continue; - } - else { + } else { String wy = ACIPRules.getWylieForACIPOther(s.getText()); if (null == wy) throw new Error("No wylie for ACIP " + s.getText()); duff = new DuffCode[] { TibetanMachineWeb.getGlyph(wy) }; diff --git a/source/org/thdl/tib/text/ttt/ACIPRules.java b/source/org/thdl/tib/text/ttt/ACIPRules.java index d01945e..92be611 100644 --- a/source/org/thdl/tib/text/ttt/ACIPRules.java +++ b/source/org/thdl/tib/text/ttt/ACIPRules.java @@ -222,7 +222,7 @@ class ACIPRules { * mark. Returns null if there is no such EWTS. */ static final String getWylieForACIPOther(String acip) { if (acipOther2wylie == null) { - acipOther2wylie = new HashMap(37); + acipOther2wylie = new HashMap(20); // DLC FIXME: check all these again. acipOther2wylie.put(",", "/"); diff --git a/source/org/thdl/tib/text/ttt/ACIPTshegBarScanner.java b/source/org/thdl/tib/text/ttt/ACIPTshegBarScanner.java index 67aeb18..5f323f8 100644 --- a/source/org/thdl/tib/text/ttt/ACIPTshegBarScanner.java +++ b/source/org/thdl/tib/text/ttt/ACIPTshegBarScanner.java @@ -788,11 +788,14 @@ public class ACIPTshegBarScanner { // Don't add in a "\r\n" or "\n" unless there's a // blank line. boolean rn = false; + boolean realNewline = false; if (('\n' != ch && '\r' != ch) - || ((rn = ('\n' == ch && i >= 3 && s.charAt(i-3) == '\r' && s.charAt(i-2) == '\n' && s.charAt(i-1) == '\r')) - || ('\n' == ch && i >= 1 && s.charAt(i-1) == '\n'))) { - al.add(new ACIPString(rn ? s.substring(i - 1, i+1) : s.substring(i, i+1), - ACIPString.TIBETAN_PUNCTUATION)); + || (realNewline + = ((rn = ('\n' == ch && i >= 3 && s.charAt(i-3) == '\r' && s.charAt(i-2) == '\n' && s.charAt(i-1) == '\r')) + || ('\n' == ch && i >= 1 && s.charAt(i-1) == '\n')))) { + for (int h = 0; h < (realNewline ? 2 : 1); h++) + al.add(new ACIPString(rn ? s.substring(i - 1, i+1) : s.substring(i, i+1), + ACIPString.TIBETAN_PUNCTUATION)); } startOfString = i+1; currentType = ACIPString.ERROR; diff --git a/source/org/thdl/tib/text/ttt/PackageTest.java b/source/org/thdl/tib/text/ttt/PackageTest.java index 80fe4a1..af5c19e 100644 --- a/source/org/thdl/tib/text/ttt/PackageTest.java +++ b/source/org/thdl/tib/text/ttt/PackageTest.java @@ -7195,11 +7195,11 @@ tstHelper("ZUR"); shelp("KA KHA\n\nGA NGA \nTA THA\n\nDA NA\n", "", - "[TIBETAN_NON_PUNCTUATION:{KA}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{KHA}, TIBETAN_PUNCTUATION:{ }, TIBETAN_PUNCTUATION:{\n}, TIBETAN_NON_PUNCTUATION:{GA}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{NGA}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{TA}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{THA}, TIBETAN_PUNCTUATION:{ }, TIBETAN_PUNCTUATION:{\n}, TIBETAN_NON_PUNCTUATION:{DA}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{NA}, TIBETAN_PUNCTUATION:{ }]"); + "[TIBETAN_NON_PUNCTUATION:{KA}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{KHA}, TIBETAN_PUNCTUATION:{ }, TIBETAN_PUNCTUATION:{\n}, TIBETAN_PUNCTUATION:{\n}, TIBETAN_NON_PUNCTUATION:{GA}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{NGA}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{TA}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{THA}, TIBETAN_PUNCTUATION:{ }, TIBETAN_PUNCTUATION:{\n}, TIBETAN_PUNCTUATION:{\n}, TIBETAN_NON_PUNCTUATION:{DA}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{NA}, TIBETAN_PUNCTUATION:{ }]"); shelp("[FIRST][SECOND][MISSING PAGE][MISSING FOLIO]", ""); - shelp("[THE INITIAL PART OF THIS TEXT WAS INPUT BY THE SERA MEY LIBRARY IN\nTIBETAN FONT AND NEEDS TO BE REDONE BY DOUBLE INPUT]\r\n\r\n", "", "[COMMENT:{[#THE INITIAL PART OF THIS TEXT WAS INPUT BY THE SERA MEY LIBRARY IN\nTIBETAN FONT AND NEEDS TO BE REDONE BY DOUBLE INPUT]}, TIBETAN_PUNCTUATION:{\r\n}]"); - shelp("[THE INITIAL PART OF THIS TEXT WAS INPUT BY THE SERA MEY LIBRARY IN\r\nTIBETAN FONT AND NEEDS TO BE REDONE BY DOUBLE INPUT]\r\n\r\n", "", "[COMMENT:{[#THE INITIAL PART OF THIS TEXT WAS INPUT BY THE SERA MEY LIBRARY IN\r\nTIBETAN FONT AND NEEDS TO BE REDONE BY DOUBLE INPUT]}, TIBETAN_PUNCTUATION:{\r\n}]"); + shelp("[THE INITIAL PART OF THIS TEXT WAS INPUT BY THE SERA MEY LIBRARY IN\nTIBETAN FONT AND NEEDS TO BE REDONE BY DOUBLE INPUT]\r\n\r\n", "", "[COMMENT:{[#THE INITIAL PART OF THIS TEXT WAS INPUT BY THE SERA MEY LIBRARY IN\nTIBETAN FONT AND NEEDS TO BE REDONE BY DOUBLE INPUT]}, TIBETAN_PUNCTUATION:{\r\n}, TIBETAN_PUNCTUATION:{\r\n}]"); + shelp("[THE INITIAL PART OF THIS TEXT WAS INPUT BY THE SERA MEY LIBRARY IN\r\nTIBETAN FONT AND NEEDS TO BE REDONE BY DOUBLE INPUT]\r\n\r\n", "", "[COMMENT:{[#THE INITIAL PART OF THIS TEXT WAS INPUT BY THE SERA MEY LIBRARY IN\r\nTIBETAN FONT AND NEEDS TO BE REDONE BY DOUBLE INPUT]}, TIBETAN_PUNCTUATION:{\r\n}, TIBETAN_PUNCTUATION:{\r\n}]"); // Test folio markers: shelp("@01A.3 ", "", "[FOLIO_MARKER:{@01A.3}, TIBETAN_PUNCTUATION:{ }]");