I don't think warning level "None" was really doing the trick. Fixed that.

You can now customize the severities of all warnings, even 504 and 510.

When warning level is "None", scanning, i.e. lexical analysis, is faster.
This commit is contained in:
dchandler 2004-04-25 00:37:57 +00:00
parent e2d42f36eb
commit 1a055f3472
7 changed files with 68 additions and 33 deletions

View File

@ -149,8 +149,7 @@ thdl.do.not.fix.rtf.hex.escapes = false
# see warning 501 even at the "Some" level, just change the option
# thdl.acip.to.tibetan.warning.severity.501 to Some. You cannot make
# a warning into an error, and you cannot make an error into a
# warning. 504 and 510 cannot be downgraded; they are always
# "Some"-level.
# warning.
thdl.acip.to.tibetan.warning.severity.501 = Most
thdl.acip.to.tibetan.warning.severity.502 = All
thdl.acip.to.tibetan.warning.severity.503 = All

View File

@ -292,7 +292,8 @@ public class TibetanConverter implements FontConverterConstants {
= ACIPTshegBarScanner.scanStream(in, null,
ThdlOptions.getIntegerOption("thdl.most.errors.a.tibetan.acip.document.can.have",
1000 - 1),
shortMessages);
shortMessages,
warningLevel);
if (null == al)
return 47;
boolean embeddedWarnings = (warningLevel != "None");

View File

@ -333,14 +333,15 @@ public class TibTextUtils implements THDLWylieConstants {
throws InvalidACIPException
{
StringBuffer errors = new StringBuffer();
ArrayList al = ACIPTshegBarScanner.scan(acip, errors, 500, false);
String warningLevel = withWarnings ? "All" : "None";
ArrayList al = ACIPTshegBarScanner.scan(acip, errors, 500, false,
warningLevel);
if (null == al || errors.length() > 0) {
if (errors.length() > 0)
throw new InvalidACIPException(errors.toString());
else
throw new InvalidACIPException("Fatal error converting ACIP to TMW.");
}
String warningLevel = withWarnings ? "All" : "None";
boolean colors = withWarnings;
boolean putWarningsInOutput = false;
if ("None" != warningLevel) {

View File

@ -66,7 +66,11 @@ public class ACIPConverter {
StringBuffer errors = new StringBuffer();
int maxErrors = 1000; // FIXME: make this PER CAPITA or else large ACIP Tibetan files are not converted for fear that they are English
boolean shortMessages = false;
ArrayList al = ACIPTshegBarScanner.scanFile(args[0], errors, maxErrors - 1, shortMessages);
String warningLevel = "Most";
ArrayList al
= ACIPTshegBarScanner.scanFile(args[0], errors,
maxErrors - 1, shortMessages,
warningLevel);
if (null == al) {
System.err.println(maxErrors + " or more lexical errors occurred while scanning ACIP input file; is this");
@ -90,7 +94,6 @@ public class ACIPConverter {
}
}
String warningLevel = "Most";
boolean colors = true;
StringBuffer warnings = null;
boolean putWarningsInOutput = false;
@ -200,7 +203,8 @@ public class ACIPConverter {
String warningLevel,
boolean shortMessages) {
ByteArrayOutputStream sw = new ByteArrayOutputStream();
ArrayList al = ACIPTshegBarScanner.scan(acip, errors, -1, shortMessages);
ArrayList al = ACIPTshegBarScanner.scan(acip, errors, -1, shortMessages,
warningLevel);
try {
if (null != al) {
convertToUnicodeText(al, sw, errors,

View File

@ -51,7 +51,8 @@ public class ACIPTshegBarScanner {
StringBuffer errors = new StringBuffer();
int maxErrors = 1000;
ArrayList al = scanFile(args[0], errors, maxErrors - 1,
"true".equals(System.getProperty("org.thdl.tib.text.ttt.ACIPTshegBarScanner.shortMessages")));
"true".equals(System.getProperty("org.thdl.tib.text.ttt.ACIPTshegBarScanner.shortMessages")),
"All" /* memory hog */);
if (null == al) {
System.out.println(maxErrors + " or more errors occurred while scanning ACIP input file; is this");
@ -82,29 +83,38 @@ public class ACIPTshegBarScanner {
* <p>FIXME: not so efficient; copies the whole file into memory
* first.
*
* @param warningLevel controls which lexical warnings you will
* encounter
*
* @throws IOException if we cannot read in the ACIP input file
* */
public static ArrayList scanFile(String fname, StringBuffer errors,
int maxErrors, boolean shortMessages)
int maxErrors, boolean shortMessages,
String warningLevel)
throws IOException
{
return scanStream(new FileInputStream(fname),
errors, maxErrors, shortMessages);
errors, maxErrors, shortMessages, warningLevel);
}
/** Scans a stream of ACIP into tsheg bars. If errors is
* non-null, error messages will be appended to it. You can
* recover both errors and warnings (modulo offset information)
* from the result, though. They will be short messages iff
* shortMessages is true. Returns a list of TStrings that is the
* scan, or null if more than maxErrors occur.
* recover both errors and (optionally) warnings (modulo offset
* information) from the result, though. They will be short
* messages iff shortMessages is true. Returns a list of
* TStrings that is the scan, or null if more than maxErrors
* occur.
*
* <p>FIXME: not so efficient; copies the whole file into memory
* first.
*
* @param warningLevel controls which lexical warnings you will
* encounter
*
* @throws IOException if we cannot read the whole ACIP stream */
public static ArrayList scanStream(InputStream stream, StringBuffer errors,
int maxErrors, boolean shortMessages)
int maxErrors, boolean shortMessages,
String warningLevel)
throws IOException
{
StringBuffer s = new StringBuffer();
@ -117,7 +127,8 @@ public class ACIPTshegBarScanner {
s.append(ch, 0, amt);
}
in.close();
return scan(s.toString(), errors, maxErrors, shortMessages);
return scan(s.toString(), errors, maxErrors, shortMessages,
warningLevel);
}
/** Helper. Here because ACIP {MTHAR%\nKHA} should be treated the
@ -196,7 +207,7 @@ public class ACIPTshegBarScanner {
* @return null if more than maxErrors errors occur, or the scan
* otherwise */
public static ArrayList scan(String s, StringBuffer errors, int maxErrors,
boolean shortMessages) {
boolean shortMessages, String warningLevel) {
// FIXME: Use less memory and time by not adding in the
// warnings that are below threshold.
@ -787,11 +798,12 @@ public class ACIPTshegBarScanner {
// or ., or [A-Za-z] follows '.'.
al.add(new TString("ACIP", s.substring(i, i+1),
TString.TIBETAN_PUNCTUATION));
if (!(i + 1 < sl
&& (s.charAt(i+1) == '.' || s.charAt(i+1) == ','
|| (s.charAt(i+1) == '\r' || s.charAt(i+1) == '\n')
|| (s.charAt(i+1) >= 'a' && s.charAt(i+1) <= 'z')
|| (s.charAt(i+1) >= 'A' && s.charAt(i+1) <= 'Z')))) {
if (ErrorsAndWarnings.isEnabled(510, warningLevel)
&& (!(i + 1 < sl
&& (s.charAt(i+1) == '.' || s.charAt(i+1) == ','
|| (s.charAt(i+1) == '\r' || s.charAt(i+1) == '\n')
|| (s.charAt(i+1) >= 'a' && s.charAt(i+1) <= 'z')
|| (s.charAt(i+1) >= 'A' && s.charAt(i+1) <= 'Z'))))) {
al.add(new TString("ACIP",
ErrorsAndWarnings.getMessage(510,
shortMessages,
@ -900,7 +912,8 @@ public class ACIPTshegBarScanner {
}
}
}
if ('%' == ch) {
if ('%' == ch
&& ErrorsAndWarnings.isEnabled(504, warningLevel)) {
al.add(new TString("ACIP",
ErrorsAndWarnings.getMessage(504,
shortMessages,

View File

@ -46,15 +46,16 @@ public class ErrorsAndWarnings {
private static HashMap severityMap = new HashMap();
static {
setupSeverityMapFromBuiltinDefaults();
setupSeverityMap();
}
/** Returns higher numbers for higher severity. */
private static int severityStringToInteger(String sev) {
if (sev == "ERROR") return Integer.MAX_VALUE;
if (sev == "Some") return Integer.MAX_VALUE - 1;
if (sev == "Most") return Integer.MAX_VALUE - 2;
if (sev == "All") return Integer.MAX_VALUE - 3;
if (sev == "None") return Integer.MAX_VALUE - 1;
if (sev == "Some") return Integer.MAX_VALUE - 2;
if (sev == "Most") return Integer.MAX_VALUE - 3;
if (sev == "All") return Integer.MAX_VALUE - 4;
return 0;
}
/** Returns true if and only if sev1 is at least as severe as
@ -306,7 +307,12 @@ public class ErrorsAndWarnings {
private static final int MIN_WARNING = 501; // inclusive
private static final int MAX_WARNING = 511; // inclusive
private static void setupSeverityMapFromBuiltinDefaults() {
/** Call this ONLY when testing unless you think hard about it.
Reinitializes the severities of all warnings and errors using
user preferences and falling back on built-in defaults if
necessary (which it shouldn't be -- options.txt should be in
the JAR with this class file. */
static void setupSeverityMap() {
// errors:
for (int i = MIN_ERROR; i <= MAX_ERROR; i++) {
severityMap.put(new Integer(i), "ERROR");
@ -356,7 +362,6 @@ public class ErrorsAndWarnings {
}
// DLC FIXME: make 506 an error? or a new, super-high priority class of warning?
// DLC FIXME: you can't turn 504 or 510 down (e.g., to an "All"-level warning)
}
/** Prints out the long forms of the error messages, which will

View File

@ -59,7 +59,7 @@ public class PackageTest extends TestCase {
which may be an error message. */
static String ACIP2TMW2ACIP(String ACIP) {
StringBuffer errors = new StringBuffer();
ArrayList al = ACIPTshegBarScanner.scan(ACIP, errors, -1, false);
ArrayList al = ACIPTshegBarScanner.scan(ACIP, errors, -1, false, "None");
if (null == al || errors.length() > 0)
return null;
org.thdl.tib.text.TibetanDocument tdoc
@ -7207,8 +7207,12 @@ tstHelper("ZUR");
}
private static void shelp(String s, String expectedErrors, String expectedScan) {
shelp(s, expectedErrors, expectedScan, "All");
}
private static void shelp(String s, String expectedErrors, String expectedScan, String warningLevel) {
StringBuffer errors = new StringBuffer();
ArrayList al = ACIPTshegBarScanner.scan(s, errors, -1, false);
ArrayList al = ACIPTshegBarScanner.scan(s, errors, -1, false, warningLevel);
if (null != expectedScan) {
if (!al.toString().equals(expectedScan)) {
System.out.println("Scanning " + s + " into tsheg bars was expected to cause the following scan:");
@ -7346,7 +7350,15 @@ tstHelper("ZUR");
"[TIBETAN_NON_PUNCTUATION:{K}, ERROR:{115: Found a backslash, \\, which the ACIP Tibetan Input Code standard says represents a Sanskrit virama. In practice, though, this is so often misused (to represent U+0F3D) that {\\} always generates this error. If you want a Sanskrit virama, change the input document to use {\\u0F84} instead of {\\}. If you want U+0F3D, use {/NYA/} or {/NYA\\u0F3D}.}, TIBETAN_PUNCTUATION:{,}]");
shelp("MTHAR%", "", "[TIBETAN_NON_PUNCTUATION:{MTHAR}, TSHEG_BAR_ADORNMENT:{%}, WARNING:{504: The ACIP {%} is treated by this converter as U+0F35, but sometimes might represent U+0F14 in practice. To avoid seeing this warning again, change the input to use {\\u0F35} instead of {%}.}]");
shelp("MTHAR%", "", "[TIBETAN_NON_PUNCTUATION:{MTHAR}, TSHEG_BAR_ADORNMENT:{%}, WARNING:{504: The ACIP {%} is treated by this converter as U+0F35, but sometimes might represent U+0F14 in practice. To avoid seeing this warning again, change the input to use {\\u0F35} instead of {%}.}]", "Some");
shelp("MTHAR%", "", "[TIBETAN_NON_PUNCTUATION:{MTHAR}, TSHEG_BAR_ADORNMENT:{%}]", "None");
ThdlOptions.setUserPreference("thdl.acip.to.tibetan.warning.severity.504", "All");
ErrorsAndWarnings.setupSeverityMap();
shelp("MTHAR%", "", "[TIBETAN_NON_PUNCTUATION:{MTHAR}, TSHEG_BAR_ADORNMENT:{%}]", "Most");
shelp("MTHAR%", "", "[TIBETAN_NON_PUNCTUATION:{MTHAR}, TSHEG_BAR_ADORNMENT:{%}, WARNING:{504: The ACIP {%} is treated by this converter as U+0F35, but sometimes might represent U+0F14 in practice. To avoid seeing this warning again, change the input to use {\\u0F35} instead of {%}.}]", "All");
ThdlOptions.setUserPreference("thdl.acip.to.tibetan.warning.severity.504", "Some"); // back to the default value
ErrorsAndWarnings.setupSeverityMap();
shelp("MTHARo", "", "[TIBETAN_NON_PUNCTUATION:{MTHAR}, TSHEG_BAR_ADORNMENT:{o}]");
shelp("MTHARx", "", "[TIBETAN_NON_PUNCTUATION:{MTHAR}, TSHEG_BAR_ADORNMENT:{x}]");