I don't think warning level "None" was really doing the trick. Fixed that.
You can now customize the severities of all warnings, even 504 and 510. When warning level is "None", scanning, i.e. lexical analysis, is faster.
This commit is contained in:
parent
e2d42f36eb
commit
1a055f3472
7 changed files with 68 additions and 33 deletions
|
@ -149,8 +149,7 @@ thdl.do.not.fix.rtf.hex.escapes = false
|
||||||
# see warning 501 even at the "Some" level, just change the option
|
# see warning 501 even at the "Some" level, just change the option
|
||||||
# thdl.acip.to.tibetan.warning.severity.501 to Some. You cannot make
|
# thdl.acip.to.tibetan.warning.severity.501 to Some. You cannot make
|
||||||
# a warning into an error, and you cannot make an error into a
|
# a warning into an error, and you cannot make an error into a
|
||||||
# warning. 504 and 510 cannot be downgraded; they are always
|
# warning.
|
||||||
# "Some"-level.
|
|
||||||
thdl.acip.to.tibetan.warning.severity.501 = Most
|
thdl.acip.to.tibetan.warning.severity.501 = Most
|
||||||
thdl.acip.to.tibetan.warning.severity.502 = All
|
thdl.acip.to.tibetan.warning.severity.502 = All
|
||||||
thdl.acip.to.tibetan.warning.severity.503 = All
|
thdl.acip.to.tibetan.warning.severity.503 = All
|
||||||
|
|
|
@ -292,7 +292,8 @@ public class TibetanConverter implements FontConverterConstants {
|
||||||
= ACIPTshegBarScanner.scanStream(in, null,
|
= ACIPTshegBarScanner.scanStream(in, null,
|
||||||
ThdlOptions.getIntegerOption("thdl.most.errors.a.tibetan.acip.document.can.have",
|
ThdlOptions.getIntegerOption("thdl.most.errors.a.tibetan.acip.document.can.have",
|
||||||
1000 - 1),
|
1000 - 1),
|
||||||
shortMessages);
|
shortMessages,
|
||||||
|
warningLevel);
|
||||||
if (null == al)
|
if (null == al)
|
||||||
return 47;
|
return 47;
|
||||||
boolean embeddedWarnings = (warningLevel != "None");
|
boolean embeddedWarnings = (warningLevel != "None");
|
||||||
|
|
|
@ -333,14 +333,15 @@ public class TibTextUtils implements THDLWylieConstants {
|
||||||
throws InvalidACIPException
|
throws InvalidACIPException
|
||||||
{
|
{
|
||||||
StringBuffer errors = new StringBuffer();
|
StringBuffer errors = new StringBuffer();
|
||||||
ArrayList al = ACIPTshegBarScanner.scan(acip, errors, 500, false);
|
String warningLevel = withWarnings ? "All" : "None";
|
||||||
|
ArrayList al = ACIPTshegBarScanner.scan(acip, errors, 500, false,
|
||||||
|
warningLevel);
|
||||||
if (null == al || errors.length() > 0) {
|
if (null == al || errors.length() > 0) {
|
||||||
if (errors.length() > 0)
|
if (errors.length() > 0)
|
||||||
throw new InvalidACIPException(errors.toString());
|
throw new InvalidACIPException(errors.toString());
|
||||||
else
|
else
|
||||||
throw new InvalidACIPException("Fatal error converting ACIP to TMW.");
|
throw new InvalidACIPException("Fatal error converting ACIP to TMW.");
|
||||||
}
|
}
|
||||||
String warningLevel = withWarnings ? "All" : "None";
|
|
||||||
boolean colors = withWarnings;
|
boolean colors = withWarnings;
|
||||||
boolean putWarningsInOutput = false;
|
boolean putWarningsInOutput = false;
|
||||||
if ("None" != warningLevel) {
|
if ("None" != warningLevel) {
|
||||||
|
|
|
@ -66,7 +66,11 @@ public class ACIPConverter {
|
||||||
StringBuffer errors = new StringBuffer();
|
StringBuffer errors = new StringBuffer();
|
||||||
int maxErrors = 1000; // FIXME: make this PER CAPITA or else large ACIP Tibetan files are not converted for fear that they are English
|
int maxErrors = 1000; // FIXME: make this PER CAPITA or else large ACIP Tibetan files are not converted for fear that they are English
|
||||||
boolean shortMessages = false;
|
boolean shortMessages = false;
|
||||||
ArrayList al = ACIPTshegBarScanner.scanFile(args[0], errors, maxErrors - 1, shortMessages);
|
String warningLevel = "Most";
|
||||||
|
ArrayList al
|
||||||
|
= ACIPTshegBarScanner.scanFile(args[0], errors,
|
||||||
|
maxErrors - 1, shortMessages,
|
||||||
|
warningLevel);
|
||||||
|
|
||||||
if (null == al) {
|
if (null == al) {
|
||||||
System.err.println(maxErrors + " or more lexical errors occurred while scanning ACIP input file; is this");
|
System.err.println(maxErrors + " or more lexical errors occurred while scanning ACIP input file; is this");
|
||||||
|
@ -90,7 +94,6 @@ public class ACIPConverter {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
String warningLevel = "Most";
|
|
||||||
boolean colors = true;
|
boolean colors = true;
|
||||||
StringBuffer warnings = null;
|
StringBuffer warnings = null;
|
||||||
boolean putWarningsInOutput = false;
|
boolean putWarningsInOutput = false;
|
||||||
|
@ -200,7 +203,8 @@ public class ACIPConverter {
|
||||||
String warningLevel,
|
String warningLevel,
|
||||||
boolean shortMessages) {
|
boolean shortMessages) {
|
||||||
ByteArrayOutputStream sw = new ByteArrayOutputStream();
|
ByteArrayOutputStream sw = new ByteArrayOutputStream();
|
||||||
ArrayList al = ACIPTshegBarScanner.scan(acip, errors, -1, shortMessages);
|
ArrayList al = ACIPTshegBarScanner.scan(acip, errors, -1, shortMessages,
|
||||||
|
warningLevel);
|
||||||
try {
|
try {
|
||||||
if (null != al) {
|
if (null != al) {
|
||||||
convertToUnicodeText(al, sw, errors,
|
convertToUnicodeText(al, sw, errors,
|
||||||
|
|
|
@ -51,7 +51,8 @@ public class ACIPTshegBarScanner {
|
||||||
StringBuffer errors = new StringBuffer();
|
StringBuffer errors = new StringBuffer();
|
||||||
int maxErrors = 1000;
|
int maxErrors = 1000;
|
||||||
ArrayList al = scanFile(args[0], errors, maxErrors - 1,
|
ArrayList al = scanFile(args[0], errors, maxErrors - 1,
|
||||||
"true".equals(System.getProperty("org.thdl.tib.text.ttt.ACIPTshegBarScanner.shortMessages")));
|
"true".equals(System.getProperty("org.thdl.tib.text.ttt.ACIPTshegBarScanner.shortMessages")),
|
||||||
|
"All" /* memory hog */);
|
||||||
|
|
||||||
if (null == al) {
|
if (null == al) {
|
||||||
System.out.println(maxErrors + " or more errors occurred while scanning ACIP input file; is this");
|
System.out.println(maxErrors + " or more errors occurred while scanning ACIP input file; is this");
|
||||||
|
@ -82,29 +83,38 @@ public class ACIPTshegBarScanner {
|
||||||
* <p>FIXME: not so efficient; copies the whole file into memory
|
* <p>FIXME: not so efficient; copies the whole file into memory
|
||||||
* first.
|
* first.
|
||||||
*
|
*
|
||||||
|
* @param warningLevel controls which lexical warnings you will
|
||||||
|
* encounter
|
||||||
|
*
|
||||||
* @throws IOException if we cannot read in the ACIP input file
|
* @throws IOException if we cannot read in the ACIP input file
|
||||||
* */
|
* */
|
||||||
public static ArrayList scanFile(String fname, StringBuffer errors,
|
public static ArrayList scanFile(String fname, StringBuffer errors,
|
||||||
int maxErrors, boolean shortMessages)
|
int maxErrors, boolean shortMessages,
|
||||||
|
String warningLevel)
|
||||||
throws IOException
|
throws IOException
|
||||||
{
|
{
|
||||||
return scanStream(new FileInputStream(fname),
|
return scanStream(new FileInputStream(fname),
|
||||||
errors, maxErrors, shortMessages);
|
errors, maxErrors, shortMessages, warningLevel);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Scans a stream of ACIP into tsheg bars. If errors is
|
/** Scans a stream of ACIP into tsheg bars. If errors is
|
||||||
* non-null, error messages will be appended to it. You can
|
* non-null, error messages will be appended to it. You can
|
||||||
* recover both errors and warnings (modulo offset information)
|
* recover both errors and (optionally) warnings (modulo offset
|
||||||
* from the result, though. They will be short messages iff
|
* information) from the result, though. They will be short
|
||||||
* shortMessages is true. Returns a list of TStrings that is the
|
* messages iff shortMessages is true. Returns a list of
|
||||||
* scan, or null if more than maxErrors occur.
|
* TStrings that is the scan, or null if more than maxErrors
|
||||||
|
* occur.
|
||||||
*
|
*
|
||||||
* <p>FIXME: not so efficient; copies the whole file into memory
|
* <p>FIXME: not so efficient; copies the whole file into memory
|
||||||
* first.
|
* first.
|
||||||
*
|
*
|
||||||
|
* @param warningLevel controls which lexical warnings you will
|
||||||
|
* encounter
|
||||||
|
*
|
||||||
* @throws IOException if we cannot read the whole ACIP stream */
|
* @throws IOException if we cannot read the whole ACIP stream */
|
||||||
public static ArrayList scanStream(InputStream stream, StringBuffer errors,
|
public static ArrayList scanStream(InputStream stream, StringBuffer errors,
|
||||||
int maxErrors, boolean shortMessages)
|
int maxErrors, boolean shortMessages,
|
||||||
|
String warningLevel)
|
||||||
throws IOException
|
throws IOException
|
||||||
{
|
{
|
||||||
StringBuffer s = new StringBuffer();
|
StringBuffer s = new StringBuffer();
|
||||||
|
@ -117,7 +127,8 @@ public class ACIPTshegBarScanner {
|
||||||
s.append(ch, 0, amt);
|
s.append(ch, 0, amt);
|
||||||
}
|
}
|
||||||
in.close();
|
in.close();
|
||||||
return scan(s.toString(), errors, maxErrors, shortMessages);
|
return scan(s.toString(), errors, maxErrors, shortMessages,
|
||||||
|
warningLevel);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Helper. Here because ACIP {MTHAR%\nKHA} should be treated the
|
/** Helper. Here because ACIP {MTHAR%\nKHA} should be treated the
|
||||||
|
@ -196,7 +207,7 @@ public class ACIPTshegBarScanner {
|
||||||
* @return null if more than maxErrors errors occur, or the scan
|
* @return null if more than maxErrors errors occur, or the scan
|
||||||
* otherwise */
|
* otherwise */
|
||||||
public static ArrayList scan(String s, StringBuffer errors, int maxErrors,
|
public static ArrayList scan(String s, StringBuffer errors, int maxErrors,
|
||||||
boolean shortMessages) {
|
boolean shortMessages, String warningLevel) {
|
||||||
// FIXME: Use less memory and time by not adding in the
|
// FIXME: Use less memory and time by not adding in the
|
||||||
// warnings that are below threshold.
|
// warnings that are below threshold.
|
||||||
|
|
||||||
|
@ -787,11 +798,12 @@ public class ACIPTshegBarScanner {
|
||||||
// or ., or [A-Za-z] follows '.'.
|
// or ., or [A-Za-z] follows '.'.
|
||||||
al.add(new TString("ACIP", s.substring(i, i+1),
|
al.add(new TString("ACIP", s.substring(i, i+1),
|
||||||
TString.TIBETAN_PUNCTUATION));
|
TString.TIBETAN_PUNCTUATION));
|
||||||
if (!(i + 1 < sl
|
if (ErrorsAndWarnings.isEnabled(510, warningLevel)
|
||||||
|
&& (!(i + 1 < sl
|
||||||
&& (s.charAt(i+1) == '.' || s.charAt(i+1) == ','
|
&& (s.charAt(i+1) == '.' || s.charAt(i+1) == ','
|
||||||
|| (s.charAt(i+1) == '\r' || s.charAt(i+1) == '\n')
|
|| (s.charAt(i+1) == '\r' || s.charAt(i+1) == '\n')
|
||||||
|| (s.charAt(i+1) >= 'a' && s.charAt(i+1) <= 'z')
|
|| (s.charAt(i+1) >= 'a' && s.charAt(i+1) <= 'z')
|
||||||
|| (s.charAt(i+1) >= 'A' && s.charAt(i+1) <= 'Z')))) {
|
|| (s.charAt(i+1) >= 'A' && s.charAt(i+1) <= 'Z'))))) {
|
||||||
al.add(new TString("ACIP",
|
al.add(new TString("ACIP",
|
||||||
ErrorsAndWarnings.getMessage(510,
|
ErrorsAndWarnings.getMessage(510,
|
||||||
shortMessages,
|
shortMessages,
|
||||||
|
@ -900,7 +912,8 @@ public class ACIPTshegBarScanner {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if ('%' == ch) {
|
if ('%' == ch
|
||||||
|
&& ErrorsAndWarnings.isEnabled(504, warningLevel)) {
|
||||||
al.add(new TString("ACIP",
|
al.add(new TString("ACIP",
|
||||||
ErrorsAndWarnings.getMessage(504,
|
ErrorsAndWarnings.getMessage(504,
|
||||||
shortMessages,
|
shortMessages,
|
||||||
|
|
|
@ -46,15 +46,16 @@ public class ErrorsAndWarnings {
|
||||||
private static HashMap severityMap = new HashMap();
|
private static HashMap severityMap = new HashMap();
|
||||||
|
|
||||||
static {
|
static {
|
||||||
setupSeverityMapFromBuiltinDefaults();
|
setupSeverityMap();
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns higher numbers for higher severity. */
|
/** Returns higher numbers for higher severity. */
|
||||||
private static int severityStringToInteger(String sev) {
|
private static int severityStringToInteger(String sev) {
|
||||||
if (sev == "ERROR") return Integer.MAX_VALUE;
|
if (sev == "ERROR") return Integer.MAX_VALUE;
|
||||||
if (sev == "Some") return Integer.MAX_VALUE - 1;
|
if (sev == "None") return Integer.MAX_VALUE - 1;
|
||||||
if (sev == "Most") return Integer.MAX_VALUE - 2;
|
if (sev == "Some") return Integer.MAX_VALUE - 2;
|
||||||
if (sev == "All") return Integer.MAX_VALUE - 3;
|
if (sev == "Most") return Integer.MAX_VALUE - 3;
|
||||||
|
if (sev == "All") return Integer.MAX_VALUE - 4;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
/** Returns true if and only if sev1 is at least as severe as
|
/** Returns true if and only if sev1 is at least as severe as
|
||||||
|
@ -306,7 +307,12 @@ public class ErrorsAndWarnings {
|
||||||
private static final int MIN_WARNING = 501; // inclusive
|
private static final int MIN_WARNING = 501; // inclusive
|
||||||
private static final int MAX_WARNING = 511; // inclusive
|
private static final int MAX_WARNING = 511; // inclusive
|
||||||
|
|
||||||
private static void setupSeverityMapFromBuiltinDefaults() {
|
/** Call this ONLY when testing unless you think hard about it.
|
||||||
|
Reinitializes the severities of all warnings and errors using
|
||||||
|
user preferences and falling back on built-in defaults if
|
||||||
|
necessary (which it shouldn't be -- options.txt should be in
|
||||||
|
the JAR with this class file. */
|
||||||
|
static void setupSeverityMap() {
|
||||||
// errors:
|
// errors:
|
||||||
for (int i = MIN_ERROR; i <= MAX_ERROR; i++) {
|
for (int i = MIN_ERROR; i <= MAX_ERROR; i++) {
|
||||||
severityMap.put(new Integer(i), "ERROR");
|
severityMap.put(new Integer(i), "ERROR");
|
||||||
|
@ -356,7 +362,6 @@ public class ErrorsAndWarnings {
|
||||||
}
|
}
|
||||||
|
|
||||||
// DLC FIXME: make 506 an error? or a new, super-high priority class of warning?
|
// DLC FIXME: make 506 an error? or a new, super-high priority class of warning?
|
||||||
// DLC FIXME: you can't turn 504 or 510 down (e.g., to an "All"-level warning)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Prints out the long forms of the error messages, which will
|
/** Prints out the long forms of the error messages, which will
|
||||||
|
|
|
@ -59,7 +59,7 @@ public class PackageTest extends TestCase {
|
||||||
which may be an error message. */
|
which may be an error message. */
|
||||||
static String ACIP2TMW2ACIP(String ACIP) {
|
static String ACIP2TMW2ACIP(String ACIP) {
|
||||||
StringBuffer errors = new StringBuffer();
|
StringBuffer errors = new StringBuffer();
|
||||||
ArrayList al = ACIPTshegBarScanner.scan(ACIP, errors, -1, false);
|
ArrayList al = ACIPTshegBarScanner.scan(ACIP, errors, -1, false, "None");
|
||||||
if (null == al || errors.length() > 0)
|
if (null == al || errors.length() > 0)
|
||||||
return null;
|
return null;
|
||||||
org.thdl.tib.text.TibetanDocument tdoc
|
org.thdl.tib.text.TibetanDocument tdoc
|
||||||
|
@ -7207,8 +7207,12 @@ tstHelper("ZUR");
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void shelp(String s, String expectedErrors, String expectedScan) {
|
private static void shelp(String s, String expectedErrors, String expectedScan) {
|
||||||
|
shelp(s, expectedErrors, expectedScan, "All");
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void shelp(String s, String expectedErrors, String expectedScan, String warningLevel) {
|
||||||
StringBuffer errors = new StringBuffer();
|
StringBuffer errors = new StringBuffer();
|
||||||
ArrayList al = ACIPTshegBarScanner.scan(s, errors, -1, false);
|
ArrayList al = ACIPTshegBarScanner.scan(s, errors, -1, false, warningLevel);
|
||||||
if (null != expectedScan) {
|
if (null != expectedScan) {
|
||||||
if (!al.toString().equals(expectedScan)) {
|
if (!al.toString().equals(expectedScan)) {
|
||||||
System.out.println("Scanning " + s + " into tsheg bars was expected to cause the following scan:");
|
System.out.println("Scanning " + s + " into tsheg bars was expected to cause the following scan:");
|
||||||
|
@ -7346,7 +7350,15 @@ tstHelper("ZUR");
|
||||||
"[TIBETAN_NON_PUNCTUATION:{K}, ERROR:{115: Found a backslash, \\, which the ACIP Tibetan Input Code standard says represents a Sanskrit virama. In practice, though, this is so often misused (to represent U+0F3D) that {\\} always generates this error. If you want a Sanskrit virama, change the input document to use {\\u0F84} instead of {\\}. If you want U+0F3D, use {/NYA/} or {/NYA\\u0F3D}.}, TIBETAN_PUNCTUATION:{,}]");
|
"[TIBETAN_NON_PUNCTUATION:{K}, ERROR:{115: Found a backslash, \\, which the ACIP Tibetan Input Code standard says represents a Sanskrit virama. In practice, though, this is so often misused (to represent U+0F3D) that {\\} always generates this error. If you want a Sanskrit virama, change the input document to use {\\u0F84} instead of {\\}. If you want U+0F3D, use {/NYA/} or {/NYA\\u0F3D}.}, TIBETAN_PUNCTUATION:{,}]");
|
||||||
|
|
||||||
|
|
||||||
shelp("MTHAR%", "", "[TIBETAN_NON_PUNCTUATION:{MTHAR}, TSHEG_BAR_ADORNMENT:{%}, WARNING:{504: The ACIP {%} is treated by this converter as U+0F35, but sometimes might represent U+0F14 in practice. To avoid seeing this warning again, change the input to use {\\u0F35} instead of {%}.}]");
|
shelp("MTHAR%", "", "[TIBETAN_NON_PUNCTUATION:{MTHAR}, TSHEG_BAR_ADORNMENT:{%}, WARNING:{504: The ACIP {%} is treated by this converter as U+0F35, but sometimes might represent U+0F14 in practice. To avoid seeing this warning again, change the input to use {\\u0F35} instead of {%}.}]", "Some");
|
||||||
|
shelp("MTHAR%", "", "[TIBETAN_NON_PUNCTUATION:{MTHAR}, TSHEG_BAR_ADORNMENT:{%}]", "None");
|
||||||
|
ThdlOptions.setUserPreference("thdl.acip.to.tibetan.warning.severity.504", "All");
|
||||||
|
ErrorsAndWarnings.setupSeverityMap();
|
||||||
|
shelp("MTHAR%", "", "[TIBETAN_NON_PUNCTUATION:{MTHAR}, TSHEG_BAR_ADORNMENT:{%}]", "Most");
|
||||||
|
shelp("MTHAR%", "", "[TIBETAN_NON_PUNCTUATION:{MTHAR}, TSHEG_BAR_ADORNMENT:{%}, WARNING:{504: The ACIP {%} is treated by this converter as U+0F35, but sometimes might represent U+0F14 in practice. To avoid seeing this warning again, change the input to use {\\u0F35} instead of {%}.}]", "All");
|
||||||
|
ThdlOptions.setUserPreference("thdl.acip.to.tibetan.warning.severity.504", "Some"); // back to the default value
|
||||||
|
ErrorsAndWarnings.setupSeverityMap();
|
||||||
|
|
||||||
shelp("MTHARo", "", "[TIBETAN_NON_PUNCTUATION:{MTHAR}, TSHEG_BAR_ADORNMENT:{o}]");
|
shelp("MTHARo", "", "[TIBETAN_NON_PUNCTUATION:{MTHAR}, TSHEG_BAR_ADORNMENT:{o}]");
|
||||||
shelp("MTHARx", "", "[TIBETAN_NON_PUNCTUATION:{MTHAR}, TSHEG_BAR_ADORNMENT:{x}]");
|
shelp("MTHARx", "", "[TIBETAN_NON_PUNCTUATION:{MTHAR}, TSHEG_BAR_ADORNMENT:{x}]");
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue