diff --git a/source/options.txt b/source/options.txt index f6d1d69..988d4dd 100644 --- a/source/options.txt +++ b/source/options.txt @@ -149,8 +149,7 @@ thdl.do.not.fix.rtf.hex.escapes = false # see warning 501 even at the "Some" level, just change the option # thdl.acip.to.tibetan.warning.severity.501 to Some. You cannot make # a warning into an error, and you cannot make an error into a -# warning. 504 and 510 cannot be downgraded; they are always -# "Some"-level. +# warning. thdl.acip.to.tibetan.warning.severity.501 = Most thdl.acip.to.tibetan.warning.severity.502 = All thdl.acip.to.tibetan.warning.severity.503 = All diff --git a/source/org/thdl/tib/input/TibetanConverter.java b/source/org/thdl/tib/input/TibetanConverter.java index 34b763a..738d81e 100644 --- a/source/org/thdl/tib/input/TibetanConverter.java +++ b/source/org/thdl/tib/input/TibetanConverter.java @@ -292,7 +292,8 @@ public class TibetanConverter implements FontConverterConstants { = ACIPTshegBarScanner.scanStream(in, null, ThdlOptions.getIntegerOption("thdl.most.errors.a.tibetan.acip.document.can.have", 1000 - 1), - shortMessages); + shortMessages, + warningLevel); if (null == al) return 47; boolean embeddedWarnings = (warningLevel != "None"); diff --git a/source/org/thdl/tib/text/TibTextUtils.java b/source/org/thdl/tib/text/TibTextUtils.java index 7daa69c..28bee50 100644 --- a/source/org/thdl/tib/text/TibTextUtils.java +++ b/source/org/thdl/tib/text/TibTextUtils.java @@ -333,14 +333,15 @@ public class TibTextUtils implements THDLWylieConstants { throws InvalidACIPException { StringBuffer errors = new StringBuffer(); - ArrayList al = ACIPTshegBarScanner.scan(acip, errors, 500, false); + String warningLevel = withWarnings ? "All" : "None"; + ArrayList al = ACIPTshegBarScanner.scan(acip, errors, 500, false, + warningLevel); if (null == al || errors.length() > 0) { if (errors.length() > 0) throw new InvalidACIPException(errors.toString()); else throw new InvalidACIPException("Fatal error converting ACIP to TMW."); } - String warningLevel = withWarnings ? "All" : "None"; boolean colors = withWarnings; boolean putWarningsInOutput = false; if ("None" != warningLevel) { diff --git a/source/org/thdl/tib/text/ttt/ACIPConverter.java b/source/org/thdl/tib/text/ttt/ACIPConverter.java index efefbdb..6c3c8c5 100644 --- a/source/org/thdl/tib/text/ttt/ACIPConverter.java +++ b/source/org/thdl/tib/text/ttt/ACIPConverter.java @@ -66,7 +66,11 @@ public class ACIPConverter { StringBuffer errors = new StringBuffer(); int maxErrors = 1000; // FIXME: make this PER CAPITA or else large ACIP Tibetan files are not converted for fear that they are English boolean shortMessages = false; - ArrayList al = ACIPTshegBarScanner.scanFile(args[0], errors, maxErrors - 1, shortMessages); + String warningLevel = "Most"; + ArrayList al + = ACIPTshegBarScanner.scanFile(args[0], errors, + maxErrors - 1, shortMessages, + warningLevel); if (null == al) { System.err.println(maxErrors + " or more lexical errors occurred while scanning ACIP input file; is this"); @@ -90,7 +94,6 @@ public class ACIPConverter { } } - String warningLevel = "Most"; boolean colors = true; StringBuffer warnings = null; boolean putWarningsInOutput = false; @@ -200,7 +203,8 @@ public class ACIPConverter { String warningLevel, boolean shortMessages) { ByteArrayOutputStream sw = new ByteArrayOutputStream(); - ArrayList al = ACIPTshegBarScanner.scan(acip, errors, -1, shortMessages); + ArrayList al = ACIPTshegBarScanner.scan(acip, errors, -1, shortMessages, + warningLevel); try { if (null != al) { convertToUnicodeText(al, sw, errors, diff --git a/source/org/thdl/tib/text/ttt/ACIPTshegBarScanner.java b/source/org/thdl/tib/text/ttt/ACIPTshegBarScanner.java index dabbc86..df087b7 100644 --- a/source/org/thdl/tib/text/ttt/ACIPTshegBarScanner.java +++ b/source/org/thdl/tib/text/ttt/ACIPTshegBarScanner.java @@ -51,7 +51,8 @@ public class ACIPTshegBarScanner { StringBuffer errors = new StringBuffer(); int maxErrors = 1000; ArrayList al = scanFile(args[0], errors, maxErrors - 1, - "true".equals(System.getProperty("org.thdl.tib.text.ttt.ACIPTshegBarScanner.shortMessages"))); + "true".equals(System.getProperty("org.thdl.tib.text.ttt.ACIPTshegBarScanner.shortMessages")), + "All" /* memory hog */); if (null == al) { System.out.println(maxErrors + " or more errors occurred while scanning ACIP input file; is this"); @@ -82,29 +83,38 @@ public class ACIPTshegBarScanner { *

FIXME: not so efficient; copies the whole file into memory * first. * + * @param warningLevel controls which lexical warnings you will + * encounter + * * @throws IOException if we cannot read in the ACIP input file * */ public static ArrayList scanFile(String fname, StringBuffer errors, - int maxErrors, boolean shortMessages) + int maxErrors, boolean shortMessages, + String warningLevel) throws IOException { return scanStream(new FileInputStream(fname), - errors, maxErrors, shortMessages); + errors, maxErrors, shortMessages, warningLevel); } /** Scans a stream of ACIP into tsheg bars. If errors is * non-null, error messages will be appended to it. You can - * recover both errors and warnings (modulo offset information) - * from the result, though. They will be short messages iff - * shortMessages is true. Returns a list of TStrings that is the - * scan, or null if more than maxErrors occur. + * recover both errors and (optionally) warnings (modulo offset + * information) from the result, though. They will be short + * messages iff shortMessages is true. Returns a list of + * TStrings that is the scan, or null if more than maxErrors + * occur. * *

FIXME: not so efficient; copies the whole file into memory * first. * + * @param warningLevel controls which lexical warnings you will + * encounter + * * @throws IOException if we cannot read the whole ACIP stream */ public static ArrayList scanStream(InputStream stream, StringBuffer errors, - int maxErrors, boolean shortMessages) + int maxErrors, boolean shortMessages, + String warningLevel) throws IOException { StringBuffer s = new StringBuffer(); @@ -117,7 +127,8 @@ public class ACIPTshegBarScanner { s.append(ch, 0, amt); } in.close(); - return scan(s.toString(), errors, maxErrors, shortMessages); + return scan(s.toString(), errors, maxErrors, shortMessages, + warningLevel); } /** Helper. Here because ACIP {MTHAR%\nKHA} should be treated the @@ -196,7 +207,7 @@ public class ACIPTshegBarScanner { * @return null if more than maxErrors errors occur, or the scan * otherwise */ public static ArrayList scan(String s, StringBuffer errors, int maxErrors, - boolean shortMessages) { + boolean shortMessages, String warningLevel) { // FIXME: Use less memory and time by not adding in the // warnings that are below threshold. @@ -787,11 +798,12 @@ public class ACIPTshegBarScanner { // or ., or [A-Za-z] follows '.'. al.add(new TString("ACIP", s.substring(i, i+1), TString.TIBETAN_PUNCTUATION)); - if (!(i + 1 < sl - && (s.charAt(i+1) == '.' || s.charAt(i+1) == ',' - || (s.charAt(i+1) == '\r' || s.charAt(i+1) == '\n') - || (s.charAt(i+1) >= 'a' && s.charAt(i+1) <= 'z') - || (s.charAt(i+1) >= 'A' && s.charAt(i+1) <= 'Z')))) { + if (ErrorsAndWarnings.isEnabled(510, warningLevel) + && (!(i + 1 < sl + && (s.charAt(i+1) == '.' || s.charAt(i+1) == ',' + || (s.charAt(i+1) == '\r' || s.charAt(i+1) == '\n') + || (s.charAt(i+1) >= 'a' && s.charAt(i+1) <= 'z') + || (s.charAt(i+1) >= 'A' && s.charAt(i+1) <= 'Z'))))) { al.add(new TString("ACIP", ErrorsAndWarnings.getMessage(510, shortMessages, @@ -900,7 +912,8 @@ public class ACIPTshegBarScanner { } } } - if ('%' == ch) { + if ('%' == ch + && ErrorsAndWarnings.isEnabled(504, warningLevel)) { al.add(new TString("ACIP", ErrorsAndWarnings.getMessage(504, shortMessages, diff --git a/source/org/thdl/tib/text/ttt/ErrorsAndWarnings.java b/source/org/thdl/tib/text/ttt/ErrorsAndWarnings.java index f5c994f..b7c1c37 100644 --- a/source/org/thdl/tib/text/ttt/ErrorsAndWarnings.java +++ b/source/org/thdl/tib/text/ttt/ErrorsAndWarnings.java @@ -46,15 +46,16 @@ public class ErrorsAndWarnings { private static HashMap severityMap = new HashMap(); static { - setupSeverityMapFromBuiltinDefaults(); + setupSeverityMap(); } /** Returns higher numbers for higher severity. */ private static int severityStringToInteger(String sev) { if (sev == "ERROR") return Integer.MAX_VALUE; - if (sev == "Some") return Integer.MAX_VALUE - 1; - if (sev == "Most") return Integer.MAX_VALUE - 2; - if (sev == "All") return Integer.MAX_VALUE - 3; + if (sev == "None") return Integer.MAX_VALUE - 1; + if (sev == "Some") return Integer.MAX_VALUE - 2; + if (sev == "Most") return Integer.MAX_VALUE - 3; + if (sev == "All") return Integer.MAX_VALUE - 4; return 0; } /** Returns true if and only if sev1 is at least as severe as @@ -306,7 +307,12 @@ public class ErrorsAndWarnings { private static final int MIN_WARNING = 501; // inclusive private static final int MAX_WARNING = 511; // inclusive - private static void setupSeverityMapFromBuiltinDefaults() { + /** Call this ONLY when testing unless you think hard about it. + Reinitializes the severities of all warnings and errors using + user preferences and falling back on built-in defaults if + necessary (which it shouldn't be -- options.txt should be in + the JAR with this class file. */ + static void setupSeverityMap() { // errors: for (int i = MIN_ERROR; i <= MAX_ERROR; i++) { severityMap.put(new Integer(i), "ERROR"); @@ -356,7 +362,6 @@ public class ErrorsAndWarnings { } // DLC FIXME: make 506 an error? or a new, super-high priority class of warning? - // DLC FIXME: you can't turn 504 or 510 down (e.g., to an "All"-level warning) } /** Prints out the long forms of the error messages, which will diff --git a/source/org/thdl/tib/text/ttt/PackageTest.java b/source/org/thdl/tib/text/ttt/PackageTest.java index 4d8c26a..6de88b4 100644 --- a/source/org/thdl/tib/text/ttt/PackageTest.java +++ b/source/org/thdl/tib/text/ttt/PackageTest.java @@ -59,7 +59,7 @@ public class PackageTest extends TestCase { which may be an error message. */ static String ACIP2TMW2ACIP(String ACIP) { StringBuffer errors = new StringBuffer(); - ArrayList al = ACIPTshegBarScanner.scan(ACIP, errors, -1, false); + ArrayList al = ACIPTshegBarScanner.scan(ACIP, errors, -1, false, "None"); if (null == al || errors.length() > 0) return null; org.thdl.tib.text.TibetanDocument tdoc @@ -7207,8 +7207,12 @@ tstHelper("ZUR"); } private static void shelp(String s, String expectedErrors, String expectedScan) { + shelp(s, expectedErrors, expectedScan, "All"); + } + + private static void shelp(String s, String expectedErrors, String expectedScan, String warningLevel) { StringBuffer errors = new StringBuffer(); - ArrayList al = ACIPTshegBarScanner.scan(s, errors, -1, false); + ArrayList al = ACIPTshegBarScanner.scan(s, errors, -1, false, warningLevel); if (null != expectedScan) { if (!al.toString().equals(expectedScan)) { System.out.println("Scanning " + s + " into tsheg bars was expected to cause the following scan:"); @@ -7346,7 +7350,15 @@ tstHelper("ZUR"); "[TIBETAN_NON_PUNCTUATION:{K}, ERROR:{115: Found a backslash, \\, which the ACIP Tibetan Input Code standard says represents a Sanskrit virama. In practice, though, this is so often misused (to represent U+0F3D) that {\\} always generates this error. If you want a Sanskrit virama, change the input document to use {\\u0F84} instead of {\\}. If you want U+0F3D, use {/NYA/} or {/NYA\\u0F3D}.}, TIBETAN_PUNCTUATION:{,}]"); - shelp("MTHAR%", "", "[TIBETAN_NON_PUNCTUATION:{MTHAR}, TSHEG_BAR_ADORNMENT:{%}, WARNING:{504: The ACIP {%} is treated by this converter as U+0F35, but sometimes might represent U+0F14 in practice. To avoid seeing this warning again, change the input to use {\\u0F35} instead of {%}.}]"); + shelp("MTHAR%", "", "[TIBETAN_NON_PUNCTUATION:{MTHAR}, TSHEG_BAR_ADORNMENT:{%}, WARNING:{504: The ACIP {%} is treated by this converter as U+0F35, but sometimes might represent U+0F14 in practice. To avoid seeing this warning again, change the input to use {\\u0F35} instead of {%}.}]", "Some"); + shelp("MTHAR%", "", "[TIBETAN_NON_PUNCTUATION:{MTHAR}, TSHEG_BAR_ADORNMENT:{%}]", "None"); + ThdlOptions.setUserPreference("thdl.acip.to.tibetan.warning.severity.504", "All"); + ErrorsAndWarnings.setupSeverityMap(); + shelp("MTHAR%", "", "[TIBETAN_NON_PUNCTUATION:{MTHAR}, TSHEG_BAR_ADORNMENT:{%}]", "Most"); + shelp("MTHAR%", "", "[TIBETAN_NON_PUNCTUATION:{MTHAR}, TSHEG_BAR_ADORNMENT:{%}, WARNING:{504: The ACIP {%} is treated by this converter as U+0F35, but sometimes might represent U+0F14 in practice. To avoid seeing this warning again, change the input to use {\\u0F35} instead of {%}.}]", "All"); + ThdlOptions.setUserPreference("thdl.acip.to.tibetan.warning.severity.504", "Some"); // back to the default value + ErrorsAndWarnings.setupSeverityMap(); + shelp("MTHARo", "", "[TIBETAN_NON_PUNCTUATION:{MTHAR}, TSHEG_BAR_ADORNMENT:{o}]"); shelp("MTHARx", "", "[TIBETAN_NON_PUNCTUATION:{MTHAR}, TSHEG_BAR_ADORNMENT:{x}]");