From 1a055f3472ce929387e531fa2f2b7e56f5a66302 Mon Sep 17 00:00:00 2001 From: dchandler Date: Sun, 25 Apr 2004 00:37:57 +0000 Subject: [PATCH] I don't think warning level "None" was really doing the trick. Fixed that. You can now customize the severities of all warnings, even 504 and 510. When warning level is "None", scanning, i.e. lexical analysis, is faster. --- source/options.txt | 3 +- .../org/thdl/tib/input/TibetanConverter.java | 3 +- source/org/thdl/tib/text/TibTextUtils.java | 5 ++- .../org/thdl/tib/text/ttt/ACIPConverter.java | 10 +++-- .../tib/text/ttt/ACIPTshegBarScanner.java | 45 ++++++++++++------- .../thdl/tib/text/ttt/ErrorsAndWarnings.java | 17 ++++--- source/org/thdl/tib/text/ttt/PackageTest.java | 18 ++++++-- 7 files changed, 68 insertions(+), 33 deletions(-) diff --git a/source/options.txt b/source/options.txt index f6d1d69..988d4dd 100644 --- a/source/options.txt +++ b/source/options.txt @@ -149,8 +149,7 @@ thdl.do.not.fix.rtf.hex.escapes = false # see warning 501 even at the "Some" level, just change the option # thdl.acip.to.tibetan.warning.severity.501 to Some. You cannot make # a warning into an error, and you cannot make an error into a -# warning. 504 and 510 cannot be downgraded; they are always -# "Some"-level. +# warning. thdl.acip.to.tibetan.warning.severity.501 = Most thdl.acip.to.tibetan.warning.severity.502 = All thdl.acip.to.tibetan.warning.severity.503 = All diff --git a/source/org/thdl/tib/input/TibetanConverter.java b/source/org/thdl/tib/input/TibetanConverter.java index 34b763a..738d81e 100644 --- a/source/org/thdl/tib/input/TibetanConverter.java +++ b/source/org/thdl/tib/input/TibetanConverter.java @@ -292,7 +292,8 @@ public class TibetanConverter implements FontConverterConstants { = ACIPTshegBarScanner.scanStream(in, null, ThdlOptions.getIntegerOption("thdl.most.errors.a.tibetan.acip.document.can.have", 1000 - 1), - shortMessages); + shortMessages, + warningLevel); if (null == al) return 47; boolean embeddedWarnings = (warningLevel != "None"); diff --git a/source/org/thdl/tib/text/TibTextUtils.java b/source/org/thdl/tib/text/TibTextUtils.java index 7daa69c..28bee50 100644 --- a/source/org/thdl/tib/text/TibTextUtils.java +++ b/source/org/thdl/tib/text/TibTextUtils.java @@ -333,14 +333,15 @@ public class TibTextUtils implements THDLWylieConstants { throws InvalidACIPException { StringBuffer errors = new StringBuffer(); - ArrayList al = ACIPTshegBarScanner.scan(acip, errors, 500, false); + String warningLevel = withWarnings ? "All" : "None"; + ArrayList al = ACIPTshegBarScanner.scan(acip, errors, 500, false, + warningLevel); if (null == al || errors.length() > 0) { if (errors.length() > 0) throw new InvalidACIPException(errors.toString()); else throw new InvalidACIPException("Fatal error converting ACIP to TMW."); } - String warningLevel = withWarnings ? "All" : "None"; boolean colors = withWarnings; boolean putWarningsInOutput = false; if ("None" != warningLevel) { diff --git a/source/org/thdl/tib/text/ttt/ACIPConverter.java b/source/org/thdl/tib/text/ttt/ACIPConverter.java index efefbdb..6c3c8c5 100644 --- a/source/org/thdl/tib/text/ttt/ACIPConverter.java +++ b/source/org/thdl/tib/text/ttt/ACIPConverter.java @@ -66,7 +66,11 @@ public class ACIPConverter { StringBuffer errors = new StringBuffer(); int maxErrors = 1000; // FIXME: make this PER CAPITA or else large ACIP Tibetan files are not converted for fear that they are English boolean shortMessages = false; - ArrayList al = ACIPTshegBarScanner.scanFile(args[0], errors, maxErrors - 1, shortMessages); + String warningLevel = "Most"; + ArrayList al + = ACIPTshegBarScanner.scanFile(args[0], errors, + maxErrors - 1, shortMessages, + warningLevel); if (null == al) { System.err.println(maxErrors + " or more lexical errors occurred while scanning ACIP input file; is this"); @@ -90,7 +94,6 @@ public class ACIPConverter { } } - String warningLevel = "Most"; boolean colors = true; StringBuffer warnings = null; boolean putWarningsInOutput = false; @@ -200,7 +203,8 @@ public class ACIPConverter { String warningLevel, boolean shortMessages) { ByteArrayOutputStream sw = new ByteArrayOutputStream(); - ArrayList al = ACIPTshegBarScanner.scan(acip, errors, -1, shortMessages); + ArrayList al = ACIPTshegBarScanner.scan(acip, errors, -1, shortMessages, + warningLevel); try { if (null != al) { convertToUnicodeText(al, sw, errors, diff --git a/source/org/thdl/tib/text/ttt/ACIPTshegBarScanner.java b/source/org/thdl/tib/text/ttt/ACIPTshegBarScanner.java index dabbc86..df087b7 100644 --- a/source/org/thdl/tib/text/ttt/ACIPTshegBarScanner.java +++ b/source/org/thdl/tib/text/ttt/ACIPTshegBarScanner.java @@ -51,7 +51,8 @@ public class ACIPTshegBarScanner { StringBuffer errors = new StringBuffer(); int maxErrors = 1000; ArrayList al = scanFile(args[0], errors, maxErrors - 1, - "true".equals(System.getProperty("org.thdl.tib.text.ttt.ACIPTshegBarScanner.shortMessages"))); + "true".equals(System.getProperty("org.thdl.tib.text.ttt.ACIPTshegBarScanner.shortMessages")), + "All" /* memory hog */); if (null == al) { System.out.println(maxErrors + " or more errors occurred while scanning ACIP input file; is this"); @@ -82,29 +83,38 @@ public class ACIPTshegBarScanner { *

FIXME: not so efficient; copies the whole file into memory * first. * + * @param warningLevel controls which lexical warnings you will + * encounter + * * @throws IOException if we cannot read in the ACIP input file * */ public static ArrayList scanFile(String fname, StringBuffer errors, - int maxErrors, boolean shortMessages) + int maxErrors, boolean shortMessages, + String warningLevel) throws IOException { return scanStream(new FileInputStream(fname), - errors, maxErrors, shortMessages); + errors, maxErrors, shortMessages, warningLevel); } /** Scans a stream of ACIP into tsheg bars. If errors is * non-null, error messages will be appended to it. You can - * recover both errors and warnings (modulo offset information) - * from the result, though. They will be short messages iff - * shortMessages is true. Returns a list of TStrings that is the - * scan, or null if more than maxErrors occur. + * recover both errors and (optionally) warnings (modulo offset + * information) from the result, though. They will be short + * messages iff shortMessages is true. Returns a list of + * TStrings that is the scan, or null if more than maxErrors + * occur. * *

FIXME: not so efficient; copies the whole file into memory * first. * + * @param warningLevel controls which lexical warnings you will + * encounter + * * @throws IOException if we cannot read the whole ACIP stream */ public static ArrayList scanStream(InputStream stream, StringBuffer errors, - int maxErrors, boolean shortMessages) + int maxErrors, boolean shortMessages, + String warningLevel) throws IOException { StringBuffer s = new StringBuffer(); @@ -117,7 +127,8 @@ public class ACIPTshegBarScanner { s.append(ch, 0, amt); } in.close(); - return scan(s.toString(), errors, maxErrors, shortMessages); + return scan(s.toString(), errors, maxErrors, shortMessages, + warningLevel); } /** Helper. Here because ACIP {MTHAR%\nKHA} should be treated the @@ -196,7 +207,7 @@ public class ACIPTshegBarScanner { * @return null if more than maxErrors errors occur, or the scan * otherwise */ public static ArrayList scan(String s, StringBuffer errors, int maxErrors, - boolean shortMessages) { + boolean shortMessages, String warningLevel) { // FIXME: Use less memory and time by not adding in the // warnings that are below threshold. @@ -787,11 +798,12 @@ public class ACIPTshegBarScanner { // or ., or [A-Za-z] follows '.'. al.add(new TString("ACIP", s.substring(i, i+1), TString.TIBETAN_PUNCTUATION)); - if (!(i + 1 < sl - && (s.charAt(i+1) == '.' || s.charAt(i+1) == ',' - || (s.charAt(i+1) == '\r' || s.charAt(i+1) == '\n') - || (s.charAt(i+1) >= 'a' && s.charAt(i+1) <= 'z') - || (s.charAt(i+1) >= 'A' && s.charAt(i+1) <= 'Z')))) { + if (ErrorsAndWarnings.isEnabled(510, warningLevel) + && (!(i + 1 < sl + && (s.charAt(i+1) == '.' || s.charAt(i+1) == ',' + || (s.charAt(i+1) == '\r' || s.charAt(i+1) == '\n') + || (s.charAt(i+1) >= 'a' && s.charAt(i+1) <= 'z') + || (s.charAt(i+1) >= 'A' && s.charAt(i+1) <= 'Z'))))) { al.add(new TString("ACIP", ErrorsAndWarnings.getMessage(510, shortMessages, @@ -900,7 +912,8 @@ public class ACIPTshegBarScanner { } } } - if ('%' == ch) { + if ('%' == ch + && ErrorsAndWarnings.isEnabled(504, warningLevel)) { al.add(new TString("ACIP", ErrorsAndWarnings.getMessage(504, shortMessages, diff --git a/source/org/thdl/tib/text/ttt/ErrorsAndWarnings.java b/source/org/thdl/tib/text/ttt/ErrorsAndWarnings.java index f5c994f..b7c1c37 100644 --- a/source/org/thdl/tib/text/ttt/ErrorsAndWarnings.java +++ b/source/org/thdl/tib/text/ttt/ErrorsAndWarnings.java @@ -46,15 +46,16 @@ public class ErrorsAndWarnings { private static HashMap severityMap = new HashMap(); static { - setupSeverityMapFromBuiltinDefaults(); + setupSeverityMap(); } /** Returns higher numbers for higher severity. */ private static int severityStringToInteger(String sev) { if (sev == "ERROR") return Integer.MAX_VALUE; - if (sev == "Some") return Integer.MAX_VALUE - 1; - if (sev == "Most") return Integer.MAX_VALUE - 2; - if (sev == "All") return Integer.MAX_VALUE - 3; + if (sev == "None") return Integer.MAX_VALUE - 1; + if (sev == "Some") return Integer.MAX_VALUE - 2; + if (sev == "Most") return Integer.MAX_VALUE - 3; + if (sev == "All") return Integer.MAX_VALUE - 4; return 0; } /** Returns true if and only if sev1 is at least as severe as @@ -306,7 +307,12 @@ public class ErrorsAndWarnings { private static final int MIN_WARNING = 501; // inclusive private static final int MAX_WARNING = 511; // inclusive - private static void setupSeverityMapFromBuiltinDefaults() { + /** Call this ONLY when testing unless you think hard about it. + Reinitializes the severities of all warnings and errors using + user preferences and falling back on built-in defaults if + necessary (which it shouldn't be -- options.txt should be in + the JAR with this class file. */ + static void setupSeverityMap() { // errors: for (int i = MIN_ERROR; i <= MAX_ERROR; i++) { severityMap.put(new Integer(i), "ERROR"); @@ -356,7 +362,6 @@ public class ErrorsAndWarnings { } // DLC FIXME: make 506 an error? or a new, super-high priority class of warning? - // DLC FIXME: you can't turn 504 or 510 down (e.g., to an "All"-level warning) } /** Prints out the long forms of the error messages, which will diff --git a/source/org/thdl/tib/text/ttt/PackageTest.java b/source/org/thdl/tib/text/ttt/PackageTest.java index 4d8c26a..6de88b4 100644 --- a/source/org/thdl/tib/text/ttt/PackageTest.java +++ b/source/org/thdl/tib/text/ttt/PackageTest.java @@ -59,7 +59,7 @@ public class PackageTest extends TestCase { which may be an error message. */ static String ACIP2TMW2ACIP(String ACIP) { StringBuffer errors = new StringBuffer(); - ArrayList al = ACIPTshegBarScanner.scan(ACIP, errors, -1, false); + ArrayList al = ACIPTshegBarScanner.scan(ACIP, errors, -1, false, "None"); if (null == al || errors.length() > 0) return null; org.thdl.tib.text.TibetanDocument tdoc @@ -7207,8 +7207,12 @@ tstHelper("ZUR"); } private static void shelp(String s, String expectedErrors, String expectedScan) { + shelp(s, expectedErrors, expectedScan, "All"); + } + + private static void shelp(String s, String expectedErrors, String expectedScan, String warningLevel) { StringBuffer errors = new StringBuffer(); - ArrayList al = ACIPTshegBarScanner.scan(s, errors, -1, false); + ArrayList al = ACIPTshegBarScanner.scan(s, errors, -1, false, warningLevel); if (null != expectedScan) { if (!al.toString().equals(expectedScan)) { System.out.println("Scanning " + s + " into tsheg bars was expected to cause the following scan:"); @@ -7346,7 +7350,15 @@ tstHelper("ZUR"); "[TIBETAN_NON_PUNCTUATION:{K}, ERROR:{115: Found a backslash, \\, which the ACIP Tibetan Input Code standard says represents a Sanskrit virama. In practice, though, this is so often misused (to represent U+0F3D) that {\\} always generates this error. If you want a Sanskrit virama, change the input document to use {\\u0F84} instead of {\\}. If you want U+0F3D, use {/NYA/} or {/NYA\\u0F3D}.}, TIBETAN_PUNCTUATION:{,}]"); - shelp("MTHAR%", "", "[TIBETAN_NON_PUNCTUATION:{MTHAR}, TSHEG_BAR_ADORNMENT:{%}, WARNING:{504: The ACIP {%} is treated by this converter as U+0F35, but sometimes might represent U+0F14 in practice. To avoid seeing this warning again, change the input to use {\\u0F35} instead of {%}.}]"); + shelp("MTHAR%", "", "[TIBETAN_NON_PUNCTUATION:{MTHAR}, TSHEG_BAR_ADORNMENT:{%}, WARNING:{504: The ACIP {%} is treated by this converter as U+0F35, but sometimes might represent U+0F14 in practice. To avoid seeing this warning again, change the input to use {\\u0F35} instead of {%}.}]", "Some"); + shelp("MTHAR%", "", "[TIBETAN_NON_PUNCTUATION:{MTHAR}, TSHEG_BAR_ADORNMENT:{%}]", "None"); + ThdlOptions.setUserPreference("thdl.acip.to.tibetan.warning.severity.504", "All"); + ErrorsAndWarnings.setupSeverityMap(); + shelp("MTHAR%", "", "[TIBETAN_NON_PUNCTUATION:{MTHAR}, TSHEG_BAR_ADORNMENT:{%}]", "Most"); + shelp("MTHAR%", "", "[TIBETAN_NON_PUNCTUATION:{MTHAR}, TSHEG_BAR_ADORNMENT:{%}, WARNING:{504: The ACIP {%} is treated by this converter as U+0F35, but sometimes might represent U+0F14 in practice. To avoid seeing this warning again, change the input to use {\\u0F35} instead of {%}.}]", "All"); + ThdlOptions.setUserPreference("thdl.acip.to.tibetan.warning.severity.504", "Some"); // back to the default value + ErrorsAndWarnings.setupSeverityMap(); + shelp("MTHARo", "", "[TIBETAN_NON_PUNCTUATION:{MTHAR}, TSHEG_BAR_ADORNMENT:{o}]"); shelp("MTHARx", "", "[TIBETAN_NON_PUNCTUATION:{MTHAR}, TSHEG_BAR_ADORNMENT:{x}]");