Jskad's converter now has ACIP-to-Unicode built in. There are known

bugs; it is pre-alpha.  It's usable, though, and finds tons of errors
in ACIP input files, with the user deciding just how pedantic to be.
The biggest outstanding bug is the silent one: treating { }, space, as
tsheg instead of whitespace when we ought to know better.
This commit is contained in:
dchandler 2003-08-24 06:40:53 +00:00
parent d5ad760230
commit 1982c5847b
11 changed files with 355 additions and 244 deletions

View file

@ -38,24 +38,23 @@ public class ACIPConverter {
ThdlOptions.setUserPreference("thdl.debug", true);
}
// DLC NOW: (KA)'s info is lost when you convert to Unicode text instead of Unicode RTF. Give an ERROR.
/** Command-line converter. Gives error messages on standard
* output about why we can't convert the document perfectly and
* exits with non-zero return code, or is silent otherwise and
* exits with code zero. <p>FIXME: not so efficient; copies the
* whole file into memory first. */
public static void main(String[] args)
throws IOException // DLC FIXME: give nice error messages
throws IOException
{
boolean verbose = true;
boolean strict = true;
if (args.length != 2
|| (!(strict = "--strict".equals(args[0])) && !"--lenient".equals(args[0]))) {
System.err.println("Bad args! Need '--strict filename' or '--lenient filename'.");
System.exit(1);
if (args.length != 1) {
System.out.println("Bad args! Need just the name of the ACIP text file.");
}
StringBuffer errors = new StringBuffer();
int maxErrors = 250;
ArrayList al = ACIPTshegBarScanner.scanFile(args[1], errors, strict, maxErrors - 1);
ArrayList al = ACIPTshegBarScanner.scanFile(args[0], errors, maxErrors - 1);
if (null == al) {
System.err.println(maxErrors + " or more lexical errors occurred while scanning ACIP input file; is this");
@ -69,7 +68,7 @@ public class ACIPConverter {
System.err.println("Exiting with " + maxErrors + " or more lexical errors; please fix input file and try again.");
System.exit(1);
}
final boolean abortUponScanningError = false; // DLC MAKE ME CONFIGURABLE
final boolean abortUponScanningError = false;
// DLC NOW: BAo isn't converting.
if (errors.length() > 0) {
System.err.println("Errors scanning ACIP input file: ");
@ -80,10 +79,15 @@ public class ACIPConverter {
}
}
StringBuffer warnings = new StringBuffer();
boolean putWarningsInOutput = true; // DLC make me configurable.
String warningLevel = "Most"; // DLC make me configurable.
StringBuffer warnings = null;
boolean putWarningsInOutput = false;
if ("None" != warningLevel) {
warnings = new StringBuffer();
putWarningsInOutput = true;
}
convertToUnicode(al, System.out, errors, warnings,
putWarningsInOutput);
putWarningsInOutput, warningLevel);
if (errors.length() > 0) {
System.err.println("Errors converting ACIP input file: ");
System.err.println(errors);
@ -91,14 +95,14 @@ public class ACIPConverter {
System.err.println("Exiting; please fix input file and try again.");
System.exit(2);
}
if (warnings.length() > 0) {
if (null != warnings && warnings.length() > 0) {
System.err.println("Warnings converting ACIP input file: ");
System.err.println(warnings);
if (putWarningsInOutput)
System.err.println("The output contains these warnings.");
System.exit(2);
}
if (verbose) System.err.println("Converted " + args[1] + " perfectly.");
if (verbose) System.err.println("Converted " + args[0] + " perfectly.");
System.exit(0);
}
@ -131,16 +135,17 @@ public class ACIPConverter {
public static String convertToUnicode(String acip,
StringBuffer errors,
StringBuffer warnings,
boolean writeWarningsToResult) {
boolean writeWarningsToResult,
String warningLevel) {
ByteArrayOutputStream sw = new ByteArrayOutputStream();
ArrayList al = ACIPTshegBarScanner.scan(acip, errors, true /* DLC FIXME */, -1);
ArrayList al = ACIPTshegBarScanner.scan(acip, errors, -1);
try {
if (null != al
&& convertToUnicode(al, sw, errors,
warnings, writeWarningsToResult)) {
warnings, writeWarningsToResult,
warningLevel)) {
return sw.toString("UTF-8");
} else {
System.out.println("DLC al is " + al + " and convertToUnicode returned null.");
return null;
}
} catch (Exception e) {
@ -151,8 +156,8 @@ public class ACIPConverter {
/** Writes Unicode to out. If errors occur in converting a tsheg
* bar, then they are appended to errors if errors is non-null.
* Furthermore, errors are written to out. If writeWarningsToOut
* is true, then warnings also will be written to out. Returns
* true upon perfect success, false if errors occurred.
* is true, then warnings also will be written to out.
* @return true upon perfect success, false if errors occurred.
* @param scan result of ACIPTshegBarScanner.scan(..)
* @param out stream to which to write converted text
* @param errors if non-null, all error messages are appended
@ -166,7 +171,8 @@ public class ACIPConverter {
OutputStream out,
StringBuffer errors,
StringBuffer warnings,
boolean writeWarningsToOut)
boolean writeWarningsToOut,
String warningLevel)
throws IOException
{
int sz = scan.size();
@ -181,8 +187,18 @@ public class ACIPConverter {
writer.write("[#ERROR CONVERTING ACIP DOCUMENT: Lexical error: ");
writer.write(s.getText());
writer.write("]");
} else if (stype == ACIPString.WARNING) {
if (writeWarningsToOut) {
writer.write("[#WARNING CONVERTING ACIP DOCUMENT: Lexical warning: ");
writer.write(s.getText());
writer.write("]");
}
if (null != warnings) {
warnings.append("Warning: Lexical warning: ");
warnings.append(s.getText());
warnings.append('\n');
}
} else {
// DLC FIXME: what about 'no A on root stack' and 'no A on such-and-such stack' warnings?
if (s.isLatin(stype)) {
if (stype == ACIPString.FOLIO_MARKER)
writer.write("{");
@ -219,7 +235,7 @@ public class ACIPConverter {
errors.append(errorMessage + "\n");
} else {
String warning
= pt.getWarning(false, // DLC: make me configurable
= pt.getWarning(warningLevel,
pl,
s.getText());
if (null != warning) {
@ -234,7 +250,7 @@ public class ACIPConverter {
}
}
unicode = sl.getUnicode();
if (null == unicode) throw new Error("DLC: HOW?");
if (null == unicode) throw new Error("FIXME: make this an assertion");
}
}
}
@ -245,7 +261,7 @@ public class ACIPConverter {
unicode = "\u0F3D";
else
unicode = ACIPRules.getUnicodeFor(s.getText(), false);
if (null == unicode) throw new Error("DLC: HOW?");
if (null == unicode) throw new Error("FIXME: make this an assertion");
}
if (null != unicode) {
writer.write(unicode);