Jskad's converter now has ACIP-to-Unicode built in. There are known

bugs; it is pre-alpha. It's usable, though, and finds tons of errors in ACIP input files, with the user deciding just how pedantic to be. The biggest outstanding bug is the silent one: treating { }, space, as tsheg instead of whitespace when we ought to know better.
2003-08-24 06:40:53 +00:00 · 2003-08-24 06:40:53 +00:00 · 1982c5847b
commit 1982c5847b
parent d5ad760230
11 changed files with 355 additions and 244 deletions
--- a/source/org/thdl/tib/input/ConvertDialog.java
+++ b/source/org/thdl/tib/input/ConvertDialog.java
@ -46,11 +46,13 @@ class ConvertDialog extends JDialog

    JComboBox choices;

+    private JComboBox warningLevels;
+
    JTextField oldTextField, newTextField;

    JButton browseOld, browseNew, convert, cancel, openDocOld, openDocNew, about;

-    JLabel type, oldLabel, newLabel;
+    JLabel oldLabel, newLabel;

    String[] choiceNames;

@ -68,6 +70,12 @@ class ConvertDialog extends JDialog
            public void theRealActionPerformed(ActionEvent e) {
                ConvertDialog.this.theRealActionPerformed(e);
            }};
+    private void updateWarningLevels() {
+        if (choices.getSelectedItem() == ACIP_TO_UNI)
+            this.warningLevels.enable();
+        else
+            this.warningLevels.disable();
+    }
    private void init()
    {
        jfc = new JFileChooser(controller.getDefaultDirectory());
@ -76,9 +84,17 @@ class ConvertDialog extends JDialog

        content = new JPanel(new GridLayout(0,1));
        JPanel temp = new JPanel(new FlowLayout(FlowLayout.CENTER,5,5));
-        type = new JLabel("Type of Conversion: ");
-        temp.add(type);
+        temp.add(new JLabel("Type of Conversion: "));
        temp.add(choices);
+        temp.add(Box.createHorizontalStrut(20));
+        temp.add(new JLabel("Warning Level: "));
+        this.warningLevels
+            = new JComboBox(new String[] { "None", "Some", "Most", "All" });
+        this.warningLevels.setSelectedItem("Most");
+        this.warningLevels.addActionListener(tal);
+        updateWarningLevels();
+
+        temp.add(warningLevels);
        content.add(temp);

        temp = new JPanel(new FlowLayout(FlowLayout.CENTER,5,5));
@ -260,7 +276,8 @@ class ConvertDialog extends JDialog
                controller.doConversion(this,
                                        origFile,
                                        convertedFile,
-                                        (String)choices.getSelectedItem());
+                                        (String)choices.getSelectedItem(),
+                                        (String)warningLevels.getSelectedItem());
            } catch (OutOfMemoryError e) {
                JOptionPane.showMessageDialog(this,
                                              "The converter ran out of memory.  Please give the\nJVM more memory by using java -XmxYYYm where YYY\nis the amount of memory your system has, or\nsomething close to it.  E.g., try\n'java -Xmx512m -jar Jskad.jar'.",
@ -316,7 +333,11 @@ class ConvertDialog extends JDialog
                                          "About",
                                          JOptionPane.PLAIN_MESSAGE);
        } else if (cmd.equals("comboBoxChanged")) {
+            JComboBox src = (JComboBox)ae.getSource();
+            if (src == choices) {
                updateNewFileGuess();
+                updateWarningLevels();
+            }
        }
    }

@ -400,7 +421,7 @@ class ConvertDialog extends JDialog
        } else { // conversion {to Wylie or TM} mode
            if (TMW_TO_WYLIE == ct) {
                newFileNamePrefix = suggested_WYLIE_prefix;
-            } else if (TMW_TO_UNI == ct) {
+            } else if (TMW_TO_UNI == ct || ACIP_TO_UNI == ct) {
                newFileNamePrefix = suggested_TO_UNI_prefix;
            } else if (TM_TO_TMW == ct) {
                newFileNamePrefix = suggested_TO_TMW_prefix;
--- a/source/org/thdl/tib/input/ConverterGUI.java
+++ b/source/org/thdl/tib/input/ConverterGUI.java
@ -48,14 +48,15 @@ public class ConverterGUI implements FontConversion, FontConverterConstants {
    }

    public boolean doConversion(ConvertDialog cd, File oldFile, File newFile,
-                                String whichConversion) {
+                                String whichConversion, String warningLevel) {
        PrintStream ps;
        try {
            returnCode
                = TibetanConverter.reallyConvert(new FileInputStream(oldFile),
                                                 ps = new PrintStream(new FileOutputStream(newFile),
                                                                      false),
-                                                 whichConversion);
+                                                 whichConversion,
+                                                 warningLevel);
            ps.close();
        } catch (FileNotFoundException e) {
            returnCode = 39;
@ -89,6 +90,28 @@ public class ConverterGUI implements FontConversion, FontConverterConstants {
                                          "Errors in Conversion",
                                          JOptionPane.ERROR_MESSAGE);
            return false;
+        } else if (45 == returnCode) {
+            if (warningLevel == "None") throw new Error("FIXME: make this an assertion");
+            JOptionPane.showMessageDialog(cd,
+                                          "No errors occurred, but some warnings are embedded in\nthe output as [#WARNING...].",
+                                          "Warnings in Conversion",
+                                          JOptionPane.ERROR_MESSAGE);
+            return false;
+        } else if (46 == returnCode) {
+            JOptionPane.showMessageDialog(cd,
+                                          "Errors occurred, and are embedded in the output\nas [#ERROR...]."
+                                          + ((warningLevel == "None")
+                                             ? ""
+                                             : "  Warnings may have occurred; if so,\nthey are embedded in the output as [#WARNING...]."),
+                                          "Errors in Conversion",
+                                          JOptionPane.ERROR_MESSAGE);
+            return false;
+        } else if (47 == returnCode) {
+            JOptionPane.showMessageDialog(cd,
+                                          "So many errors occurred that the document is likely\nEnglish, not Tibetan.  No output was produced.",
+                                          "Many Errors in Conversion",
+                                          JOptionPane.ERROR_MESSAGE);
+            return false;
        } else if (1 == returnCode) {
            if (FIND_SOME_NON_TMW == whichConversion
                || FIND_ALL_NON_TMW == whichConversion) {
@ -102,6 +125,8 @@ public class ConverterGUI implements FontConversion, FontConverterConstants {
                                              "Something besides TibetanMachine was found; see output file.",
                                              "Not entirely TM",
                                              JOptionPane.PLAIN_MESSAGE);
+            } else {
+                throw new Error("Who returned this??");
            }
            return false;
        } else if (0 != returnCode) {
@ -150,6 +175,7 @@ public class ConverterGUI implements FontConversion, FontConverterConstants {
        try {
            final ConvertDialog convDialog;
            String[] choices = new String[]{
+                ACIP_TO_UNI,
                TM_TO_TMW,
                TMW_TO_UNI,
                TMW_TO_WYLIE,
--- a/source/org/thdl/tib/input/DuffPaneTest.java
+++ b/source/org/thdl/tib/input/DuffPaneTest.java
@ -372,6 +372,9 @@ public class DuffPaneTest extends TestCase {
        ensureKeysGiveCorrectWylie("bskyUMbs");
        ensureKeysGiveCorrectWylie("bskyUMbsHgro ");

+        ensureKeysGiveCorrectWylie("gyurd", "gyurda");
+        ensureKeysGiveCorrectWylie("gyur.d");
+
        ensureKeysGiveCorrectWylie("favakakhagangacachajanyatathadanapaphabamatsatshadzawazhaza'ayaralashasahaTaThaDaNaSha");
        ensureKeysGiveCorrectWylie("fevekekhegengecechejenyetethedenepephebemetsetshedzewezheze'eyerelesheseheTeTheDeNeShe");
        ensureKeysGiveCorrectWylie("fuvukukhugungucuchujunyututhudunupuphubumutsutshudzuwuzhuzu'uyurulushusuhuTuThuDuNuShu");
--- a/source/org/thdl/tib/input/FontConversion.java
+++ b/source/org/thdl/tib/input/FontConversion.java
@ -37,5 +37,6 @@ interface FontConversion
        whichConversion, which must be one of the known conversions.
        @return true on success, false otherwise */
    boolean doConversion(ConvertDialog cd, File oldFile,
-                         File newFile, String whichConversion);
+                         File newFile, String whichConversion,
+                         String warningLevel);
 }
--- a/source/org/thdl/tib/input/FontConverterConstants.java
+++ b/source/org/thdl/tib/input/FontConverterConstants.java
@ -26,6 +26,7 @@ import java.awt.*;
    @author Nathaniel Garson, Tibetan and Himalayan Digital Library */
 interface FontConverterConstants
 {
+    final String ACIP_TO_UNI = "ACIP to Unicode";
    final String TM_TO_TMW = "TM to TMW";
    final String TMW_TO_UNI = "TMW to Unicode";
    final String TMW_TO_WYLIE = "TMW to Wylie";
--- a/source/org/thdl/tib/input/TibetanConverter.java
+++ b/source/org/thdl/tib/input/TibetanConverter.java
@ -26,6 +26,10 @@ import javax.swing.text.StyleConstants;
 import org.thdl.util.*;
 import org.thdl.tib.text.*;

+import org.thdl.tib.text.ttt.ACIPConverter;
+import org.thdl.tib.text.ttt.ACIPTshegBarScanner;
+import java.util.ArrayList;
+
 /** TibetanConverter is a command-line utility for converting to
 *  and from Tibetan Machine Web (TMW).  It converts TMW to Wylie, to
 *  Unicode, or to Tibetan Machine (TM).  It also converts TM to TMW.
@ -66,6 +70,7 @@ public class TibetanConverter implements FontConverterConstants {
        try {
            boolean convertToUnicodeMode = false;
            boolean convertToTMMode = false;
+            boolean convertACIPToUniMode = false;
            boolean convertToTMWMode = false;
            boolean convertToWylieMode = false;
            boolean findSomeNonTMWMode = false;
@ -84,6 +89,8 @@ public class TibetanConverter implements FontConverterConstants {
                             = args[0].equals("--to-tibetan-machine"))
                         || (convertToTMWMode
                             = args[0].equals("--to-tibetan-machine-web"))
+                         || (convertACIPToUniMode
+                             = args[0].equals("--acip-to-unicode"))
                         || (convertToUnicodeMode
                             = args[0].equals("--to-unicode"))
                         || (convertToWylieMode
@ -98,6 +105,7 @@ public class TibetanConverter implements FontConverterConstants {
                out.println("TibetanConverter [--find-all-non-tmw | --find-some-non-tmw");
                out.println("                  | --to-tibetan-machine | --to-tibetan-machine-web");
                out.println("                  | --to-unicode | --to-wylie] RTF_file");
+                out.println(" | TibetanConverter --acip-to-unicode TXT_file");
                out.println(" | TibetanConverter [--version | -v | --help | -h]");
                out.println("");
                out.println("Distributed under the terms of the THDL Open Community License Version 1.0.");
@ -105,6 +113,11 @@ public class TibetanConverter implements FontConverterConstants {
                out.println("Usage:");
                out.println(" -v | --version for version info");
                out.println(" -h | --help for this message");
+                out.println(" --to-tibetan-machine to convert TibetanMachineWeb to TibetanMachine");
+                out.println(" --to-unicode to convert TibetanMachineWeb to Unicode");
+                out.println(" --to-tibetan-machine-web to convert TibetanMachine to TibetanMachineWeb");
+                out.println(" --to-wylie to convert TibetanMachineWeb to THDL Extended Wylie");
+                out.println(" --acip-to-unicode to convert ACIP text file to Unicode text file");
                out.println(" --find-all-non-tmw to locate all characters in the input document that are");
                out.println("   not in Tibetan Machine Web fonts, exit zero if and only if none found");
                out.println(" --find-some-non-tmw to locate all distinct characters in the input document");
@ -113,14 +126,12 @@ public class TibetanConverter implements FontConverterConstants {
                out.println("   not in Tibetan Machine fonts, exit zero if and only if none found");
                out.println(" --find-some-non-tm to locate all distinct characters in the input document");
                out.println("   not in Tibetan Machine fonts, exit zero if and only if none found");
-                out.println(" --to-tibetan-machine to convert TibetanMachineWeb to TibetanMachine");
-                out.println(" --to-unicode to convert TibetanMachineWeb to Unicode");
-                out.println(" --to-tibetan-machine-web to convert TibetanMachine to TibetanMachineWeb");
-                out.println(" --to-wylie to convert TibetanMachineWeb to THDL Extended Wylie");
                out.println("");
-                out.println(" In --to... modes, needs one argument, the name of the TibetanMachineWeb RTF");
+                out.println(" In --to... and --acip-to... modes, needs one argument, the name of the");
+                out.println(" TibetanMachineWeb RTF");
                out.println(" file (for --to-wylie, --to-unicode, and --to-tibetan-machine) or the name of");
-                out.println(" the TibetanMachine RTF file (for --to-tibetan-machine-web).  Writes the");
+                out.println(" the TibetanMachine RTF file (for --to-tibetan-machine-web) or the name of the");
+                out.println(" ACIP text file (for --acip-to-unicode).  Writes the");
                out.println(" result to standard output (after dealing with the curly brace problem if");
                out.println(" the input is TibetanMachineWeb).  Exit code is zero on success, 42 if some");
                out.println(" glyphs couldn't be converted (in which case the output is just those glyphs),");
@ -135,11 +146,10 @@ public class TibetanConverter implements FontConverterConstants {
                out.println(" You may find it helpful to use `--find-some-non-tmw' mode (or");
                out.println(" `--find-some-non-tm' mode for Tibetan Machine input) before doing a");
                out.println(" conversion so that you have confidence in the conversion's correctness.");
-                // DLC add Wylie->TMW mode.
                return 77;
            }
            if (args[0].equals("--version") || args[0].equals("-v")) {
-                out.println("TibetanConverter version 0.82");
+                out.println("TibetanConverter version 0.83");
                out.println("Compiled at "
                            + ThdlVersion.getTimeOfCompilation());
                return 77;
@ -168,12 +178,15 @@ public class TibetanConverter implements FontConverterConstants {
                    conversionTag = TMW_TO_UNI;
                } else if (convertToTMWMode) {
                    conversionTag = TM_TO_TMW;
+                } else if (convertACIPToUniMode) {
+                    conversionTag = ACIP_TO_UNI;
                } else {
                    ThdlDebug.verify(convertToTMMode);
                    conversionTag = TMW_TO_TM;
                }
            }
-            return reallyConvert(in, out, conversionTag);
+            return reallyConvert(in, out, conversionTag, "Most" // DLC make me configurable
+                                 );
        } catch (ThdlLazyException e) {
            out.println("TibetanConverter has a BUG:");
            e.getRealException().printStackTrace(out);
@ -190,7 +203,29 @@ public class TibetanConverter implements FontConverterConstants {
        number of strings -- see the code.  Returns an appropriate
        return code so that TibetanConverter's usage message is
        honored. */
-    static int reallyConvert(InputStream in, PrintStream out, String ct) {
+    static int reallyConvert(InputStream in, PrintStream out, String ct,
+                             String warningLevel) {
+        if (ACIP_TO_UNI == ct) {
+            try {
+                ArrayList al = ACIPTshegBarScanner.scanStream(in, null,
+                                                              250 - 1 // DLC FIXME: make me configurable
+                                                              );
+                if (null == al)
+                    return 47;
+                StringBuffer warnings = new StringBuffer();
+                boolean embeddedWarnings = (warningLevel != "None");
+                if (!ACIPConverter.convertToUnicode(al, out, null, warnings,
+                                                    embeddedWarnings,
+                                                    warningLevel))
+                    return 46;
+                if (embeddedWarnings && warnings.length() > 0)
+                    return 45;
+                else
+                    return 0;
+            } catch (IOException e) {
+                return 48;
+            }
+        } else {
            TibetanDocument tdoc = new TibetanDocument();
            {
                SimpleAttributeSet ras = new SimpleAttributeSet();
@ -318,4 +353,5 @@ public class TibetanConverter implements FontConverterConstants {
                return exitCode;
            }
        }
+    }
 }
--- a/source/org/thdl/tib/text/ttt/ACIPConverter.java
+++ b/source/org/thdl/tib/text/ttt/ACIPConverter.java
@ -38,24 +38,23 @@ public class ACIPConverter {
        ThdlOptions.setUserPreference("thdl.debug", true);
    }

+    // DLC NOW: (KA)'s info is lost when you convert to Unicode text instead of Unicode RTF.  Give an ERROR.
+
    /** Command-line converter.  Gives error messages on standard
     *  output about why we can't convert the document perfectly and
     *  exits with non-zero return code, or is silent otherwise and
     *  exits with code zero.  <p>FIXME: not so efficient; copies the
     *  whole file into memory first. */
    public static void main(String[] args)
-        throws IOException // DLC FIXME: give nice error messages
+        throws IOException
    {
        boolean verbose = true;
-        boolean strict = true;
-        if (args.length != 2
-            || (!(strict = "--strict".equals(args[0])) && !"--lenient".equals(args[0]))) {
-            System.err.println("Bad args!  Need '--strict filename' or '--lenient filename'.");
-            System.exit(1);
+        if (args.length != 1) {
+            System.out.println("Bad args!  Need just the name of the ACIP text file.");
        }
        StringBuffer errors = new StringBuffer();
        int maxErrors = 250;
-        ArrayList al = ACIPTshegBarScanner.scanFile(args[1], errors, strict, maxErrors - 1);
+        ArrayList al = ACIPTshegBarScanner.scanFile(args[0], errors, maxErrors - 1);

        if (null == al) {
            System.err.println(maxErrors + " or more lexical errors occurred while scanning ACIP input file; is this");
@ -69,7 +68,7 @@ public class ACIPConverter {
            System.err.println("Exiting with " + maxErrors + " or more lexical errors; please fix input file and try again.");
            System.exit(1);
        }
-        final boolean abortUponScanningError = false; // DLC MAKE ME CONFIGURABLE
+        final boolean abortUponScanningError = false;
        // DLC NOW: BAo isn't converting.
        if (errors.length() > 0) {
            System.err.println("Errors scanning ACIP input file: ");
@ -80,10 +79,15 @@ public class ACIPConverter {
            }
        }

-        StringBuffer warnings = new StringBuffer();
-        boolean putWarningsInOutput = true; // DLC make me configurable.
+        String warningLevel = "Most"; // DLC make me configurable.
+        StringBuffer warnings = null;
+        boolean putWarningsInOutput = false;
+        if ("None" != warningLevel) {
+            warnings = new StringBuffer();
+            putWarningsInOutput = true;
+        }
        convertToUnicode(al, System.out, errors, warnings,
-                         putWarningsInOutput);
+                         putWarningsInOutput, warningLevel);
        if (errors.length() > 0) {
            System.err.println("Errors converting ACIP input file: ");
            System.err.println(errors);
@ -91,14 +95,14 @@ public class ACIPConverter {
            System.err.println("Exiting; please fix input file and try again.");
            System.exit(2);
        }
-        if (warnings.length() > 0) {
+        if (null != warnings && warnings.length() > 0) {
            System.err.println("Warnings converting ACIP input file: ");
            System.err.println(warnings);
            if (putWarningsInOutput)
                System.err.println("The output contains these warnings.");
            System.exit(2);
        }
-        if (verbose) System.err.println("Converted " + args[1] + " perfectly.");
+        if (verbose) System.err.println("Converted " + args[0] + " perfectly.");
        System.exit(0);
    }

@ -131,16 +135,17 @@ public class ACIPConverter {
    public static String convertToUnicode(String acip,
                                          StringBuffer errors,
                                          StringBuffer warnings,
-                                          boolean writeWarningsToResult) {
+                                          boolean writeWarningsToResult,
+                                          String warningLevel) {
        ByteArrayOutputStream sw = new ByteArrayOutputStream();
-        ArrayList al = ACIPTshegBarScanner.scan(acip, errors, true /* DLC FIXME */, -1);
+        ArrayList al = ACIPTshegBarScanner.scan(acip, errors, -1);
        try {
            if (null != al
                && convertToUnicode(al, sw, errors,
-                                    warnings, writeWarningsToResult)) {
+                                    warnings, writeWarningsToResult,
+                                    warningLevel)) {
                return sw.toString("UTF-8");
            } else {
-                System.out.println("DLC al is " + al + " and convertToUnicode returned null.");
                return null;
            }
        } catch (Exception e) {
@ -151,8 +156,8 @@ public class ACIPConverter {
    /** Writes Unicode to out.  If errors occur in converting a tsheg
     *  bar, then they are appended to errors if errors is non-null.
     *  Furthermore, errors are written to out.  If writeWarningsToOut
-     *  is true, then warnings also will be written to out.  Returns
-     *  true upon perfect success, false if errors occurred.
+     *  is true, then warnings also will be written to out.
+     *  @return true upon perfect success, false if errors occurred.
     *  @param scan result of ACIPTshegBarScanner.scan(..)
     *  @param out stream to which to write converted text
     *  @param errors if non-null, all error messages are appended
@ -166,7 +171,8 @@ public class ACIPConverter {
                                           OutputStream out,
                                           StringBuffer errors,
                                           StringBuffer warnings,
-                                           boolean writeWarningsToOut)
+                                           boolean writeWarningsToOut,
+                                           String warningLevel)
        throws IOException
    {
        int sz = scan.size();
@ -181,8 +187,18 @@ public class ACIPConverter {
                writer.write("[#ERROR CONVERTING ACIP DOCUMENT: Lexical error: ");
                writer.write(s.getText());
                writer.write("]");
+            } else if (stype == ACIPString.WARNING) {
+                if (writeWarningsToOut) {
+                    writer.write("[#WARNING CONVERTING ACIP DOCUMENT: Lexical warning: ");
+                    writer.write(s.getText());
+                    writer.write("]");
+                }
+                if (null != warnings) {
+                    warnings.append("Warning: Lexical warning: ");
+                    warnings.append(s.getText());
+                    warnings.append('\n');
+                }
            } else {
-                // DLC FIXME: what about 'no A on root stack' and 'no A on such-and-such stack' warnings?
                if (s.isLatin(stype)) {
                    if (stype == ACIPString.FOLIO_MARKER)
                        writer.write("{");
@ -219,7 +235,7 @@ public class ACIPConverter {
                                        errors.append(errorMessage + "\n");
                                } else {
                                    String warning
-                                        = pt.getWarning(false, // DLC: make me configurable
+                                        = pt.getWarning(warningLevel,
                                                        pl,
                                                        s.getText());
                                    if (null != warning) {
@ -234,7 +250,7 @@ public class ACIPConverter {
                                        }
                                    }
                                    unicode = sl.getUnicode();
-                                    if (null == unicode) throw new Error("DLC: HOW?");
+                                    if (null == unicode) throw new Error("FIXME: make this an assertion");
                                }
                            }
                        }
@ -245,7 +261,7 @@ public class ACIPConverter {
                            unicode = "\u0F3D";
                        else
                            unicode = ACIPRules.getUnicodeFor(s.getText(), false);
-                        if (null == unicode) throw new Error("DLC: HOW?");
+                        if (null == unicode) throw new Error("FIXME: make this an assertion");
                    }
                    if (null != unicode) {
                        writer.write(unicode);
--- a/source/org/thdl/tib/text/ttt/ACIPString.java
+++ b/source/org/thdl/tib/text/ttt/ACIPString.java
@ -75,9 +75,11 @@ public class ACIPString {
    public static final int START_PAREN = 15;
    /** For the closing ) in (NYA) */
    public static final int END_PAREN = 16;
+    /** For things that may not be legal syntax, such as {KA . KHA} */
+    public static final int WARNING = 17;
    /** For things that are not legal syntax, such as a file that
     * contains just "[# HALF A COMMEN" */
-    public static final int ERROR = 17;
+    public static final int ERROR = 18;

    /** Returns true if and only if this string is Latin (usually
     *  English).  Returns false if this string is transliteration of
@ -132,6 +134,7 @@ public class ACIPString {
        if (type == END_SLASH) typeString = "END_SLASH";
        if (type == START_PAREN) typeString = "START_PAREN";
        if (type == END_PAREN) typeString = "END_PAREN";
+        if (type == WARNING) typeString = "WARNING";
        if (type == ERROR) typeString = "ERROR";
        return typeString + ":{" + getText() + "}";
    }
--- a/source/org/thdl/tib/text/ttt/ACIPTshegBarScanner.java
+++ b/source/org/thdl/tib/text/ttt/ACIPTshegBarScanner.java
@ -39,15 +39,13 @@ public class ACIPTshegBarScanner {
     *  with code zero.  <p>FIXME: not so efficient; copies the whole
     *  file into memory first. */
    public static void main(String[] args) throws IOException {
-        boolean strict = true;
-        if (args.length != 2
-            || (!(strict = "--strict".equals(args[0])) && !"--lenient".equals(args[0]))) {
-            System.out.println("Bad args!  Need '--strict filename' or '--lenient filename'.");
+        if (args.length != 1) {
+            System.out.println("Bad args!  Need just the name of the ACIP text file.");
            System.exit(1);
        }
        StringBuffer errors = new StringBuffer();
        int maxErrors = 250;
-        ArrayList al = scanFile(args[1], errors, strict, maxErrors - 1);
+        ArrayList al = scanFile(args[0], errors, maxErrors - 1);

        if (null == al) {
            System.out.println(maxErrors + " or more errors occurred while scanning ACIP input file; is this");
@ -70,27 +68,39 @@ public class ACIPTshegBarScanner {
    }

    /** Scans an ACIP file with path fname into tsheg bars.  If errors
-     *  is non-null, error messages will be appended to it.  If strict
-     *  is true, then you're more likely to see error
-     *  messages. Returns a list of ACIPStrings that is the
-     *  scan. <p>FIXME: not so efficient; copies the whole file into
-     *  memory first.
+     *  is non-null, error messages will be appended to it.  Returns a
+     *  list of ACIPStrings that is the scan. <p>FIXME: not so
+     *  efficient; copies the whole file into memory first.
     *  @throws IOException if we cannot read in the ACIP input file */
-    public static ArrayList scanFile(String fname, StringBuffer errors, boolean strict, int maxErrors)
+    public static ArrayList scanFile(String fname, StringBuffer errors, int maxErrors)
+        throws IOException
+    {
+        return scanStream(new FileInputStream(fname),
+                          errors, maxErrors);
+    }
+
+    /** Scans a stream of ACIP into tsheg bars.  If errors is
+     *  non-null, error messages will be appended to it.  You can
+     *  recover both errors and warnings (modulo offset information)
+     *  from the result, though.  Returns a list of ACIPStrings that
+     *  is the scan, or null if more than maxErrors occur. <p>FIXME:
+     *  not so efficient; copies the whole file into memory first.
+     *  @throws IOException if we cannot read the whole ACIP stream */
+    public static ArrayList scanStream(InputStream stream, StringBuffer errors,
+                                       int maxErrors)
        throws IOException
    {
        StringBuffer s = new StringBuffer();
        char ch[] = new char[8192];
        BufferedReader in
-            = new BufferedReader(new InputStreamReader(new FileInputStream(fname),
-                                                       "US-ASCII"));
+            = new BufferedReader(new InputStreamReader(stream, "US-ASCII"));

        int amt;
        while (-1 != (amt = in.read(ch))) {
            s.append(ch, 0, amt);
        }
        in.close();
-        return scan(s.toString(), errors, !strict, maxErrors);
+        return scan(s.toString(), errors, maxErrors);
    }

    /** Returns a list of {@link ACIPString ACIPStrings} corresponding
@ -99,26 +109,25 @@ public class ACIPTshegBarScanner {
     *  text, a tsheg bar (minus the tsheg or shad or whatever), a
     *  String of inter-tsheg-bar punctuation, etc.
     *
-     *  <p>This not only scans; it finds all the errors a parser would
-     *  too, like "NYA x" and "(" and ")" and "/NYA" etc.  It puts
-     *  those in as ACIPStrings with type {@link ACIPString#ERROR},
-     *  and also, if errors is non-null, appends helpful messages to
-     *  errors, each followed by a '\n'.  There is at least one case
-     *  where no ERROR ACIPString will appear but errors will be
-     *  modified.
+     *  <p>This not only scans; it finds all the errors and warnings a
+     *  parser would too, like "NYA x" and "(" and ")" and "/NYA" etc.
+     *  It puts those in as ACIPStrings with type {@link
+     *  ACIPString#ERROR} or {@link ACIPString#WARNING}, and also, if
+     *  errors is non-null, appends helpful messages to errors, each
+     *  followed by a '\n'.
     *  @param s the ACIP text
     *  @param errors if non-null, the buffer to which to append error
-     *  messages
-     *  @param lenientPeriods if and only if this is true, periods
-     *  will never cause errors, even if iffy text like "PAS... LA "
-     *  appears.
+     *  messages (DLC FIXME: cludge, just get this info by scanning
+     *  the result for ACIPString.ERROR (and maybe ACIPString.WARNING,
+     *  if you care about warnings), but then we'd have to put the
+     *  Offset info in the ACIPString)
     *  @param maxErrors if nonnegative, then scanning will stop when
     *  more than maxErrors errors occur.  In this event, null is
     *  returned.
     *  @return null if more than maxErrors errors occur, or the scan
     *  otherwise
    */
-    public static ArrayList scan(String s, StringBuffer errors, boolean lenientPeriods, int maxErrors) {
+    public static ArrayList scan(String s, StringBuffer errors, int maxErrors) {

        // the size depends on whether it's mostly Tibetan or mostly
        // Latin and a number of other factors.  This is meant to be
@ -159,9 +168,9 @@ public class ACIPTshegBarScanner {
                        al.add(new ACIPString(s.substring(startOfString, i),
                                              currentType));
                    }
+                    if (!waitingForMatchingIllegalClose) {
                        al.add(new ACIPString("Found a truly unmatched close bracket, " + s.substring(i, i+1),
                                              ACIPString.ERROR));
-                    if (!waitingForMatchingIllegalClose) {
                        if (null != errors) {
                            errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
                                          + "Found a truly unmatched close bracket, ] or }.\n");
@ -169,6 +178,8 @@ public class ACIPTshegBarScanner {
                        if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
                    }
                    waitingForMatchingIllegalClose = false;
+                    al.add(new ACIPString("Found a closing bracket without a matching open bracket.  Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.",
+                                          ACIPString.ERROR));
                    if (null != errors)
                        errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
                                      + "Found a closing bracket without a matching open bracket.  Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n");
@ -422,9 +433,9 @@ public class ACIPTshegBarScanner {
                    // This is an error.  Sometimes [COMMENTS APPEAR
                    // WITHOUT # MARKS].  Though "... [" could cause
                    // this too.
-                    al.add(new ACIPString("Found an illegal open bracket: " + s.substring(i, i+1),
-                                          ACIPString.ERROR));
                    if (waitingForMatchingIllegalClose) {
+                        al.add(new ACIPString("Found a truly unmatched open bracket, [ or {, prior to this current illegal open bracket.",
+                                              ACIPString.ERROR));
                        if (null != errors) {
                            errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
                                          + "Found a truly unmatched open bracket, [ or {, prior to this current illegal open bracket.\n");
@ -443,6 +454,8 @@ public class ACIPTshegBarScanner {
                                inContext = inContext + "...";
                            }
                        }
+                        al.add(new ACIPString("Found an illegal open bracket (in context, this is " + inContext + ").  Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?",
+                                              ACIPString.ERROR));
                        errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
                                      + "Found an illegal open bracket (in context, this is " + inContext + ").  Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\n");
                        if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
@ -729,23 +742,17 @@ public class ACIPTshegBarScanner {
                    currentType = ACIPString.ERROR;
                }
                // . is used for a non-breaking tsheg, such as in
-                // {NGO.,} and {....,DAM}.  We give an error unless ,
+                // {NGO.,} and {....,DAM}.  We give a warning unless ,
                // or ., or [A-Za-z] follows '.'.
-                if (lenientPeriods
-                    || (i + 1 < sl
+                al.add(new ACIPString(s.substring(i, i+1),
+                                      ACIPString.TIBETAN_PUNCTUATION));
+                if (!(i + 1 < sl
                      && (s.charAt(i+1) == '.' || s.charAt(i+1) == ','
                          || (s.charAt(i+1) == '\r' || s.charAt(i+1) == '\n')
                          || (s.charAt(i+1) >= 'a' && s.charAt(i+1) <= 'z')
                          || (s.charAt(i+1) >= 'A' && s.charAt(i+1) <= 'Z')))) {
-                    al.add(new ACIPString(s.substring(i, i+1),
-                                          ACIPString.TIBETAN_PUNCTUATION));
-                } else {
                    al.add(new ACIPString("A non-breaking tsheg, '.', appeared, but not like \"...,\" or \".,\" or \".dA\" or \".DA\".",
-                                          ACIPString.ERROR));
-                    if (null != errors)
-                        errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
-                                      + "A non-breaking tsheg, '.', appeared, but not like \"...,\" or \".,\" or \".dA\" or \".DA\".\n");
-                    if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
+                                          ACIPString.WARNING));
                }
                startOfString = i+1;
                break; // end '.' case
@ -832,16 +839,11 @@ public class ACIPTshegBarScanner {
            if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
        }
        if (!bracketTypeStack.empty()) {
-            al.add(new ACIPString("UNEXPECTED END OF INPUT",
+            al.add(new ACIPString("Unmatched open bracket found.  A " + ((ACIPString.COMMENT == currentType) ? "comment" : "correction") + " does not terminate.",
                                  ACIPString.ERROR));
            if (null != errors) {
-                if (ACIPString.COMMENT == currentType) {
                errors.append("Offset END: "
-                                  + "Unmatched open bracket found.  A comment does not terminate.\n");
-                } else {
-                    errors.append("Offset END: "
-                                  + "Unmatched open bracket found.  A correction does not terminate.\n");
-                }
+                              + "Unmatched open bracket found.  A " + ((ACIPString.COMMENT == currentType) ? "comment" : "correction") + " does not terminate.\n");
            }
            if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
        }
--- a/source/org/thdl/tib/text/ttt/PackageTest.java
+++ b/source/org/thdl/tib/text/ttt/PackageTest.java
@ -102,10 +102,10 @@ public class PackageTest extends TestCase {
            assertTrue(null == expectedLegalParses || expectedLegalParses.length == 0);
            return;
        } else {
-            if (pt.getWarning(false, l, acip) != null) {
-                System.out.println(pt.getWarning(false, l, acip));
-            } else if (pt.getWarning(true, l, acip) != null)
-                if (sdebug || debug) System.out.println("Paranoiac warning is this: " + pt.getWarning(true, l, acip));
+            if (pt.getWarning("Most", l, acip) != null) {
+                System.out.println(pt.getWarning("Most", l, acip));
+            } else if (pt.getWarning("All", l, acip) != null)
+                if (sdebug || debug) System.out.println("Paranoiac warning is this: " + pt.getWarning("All", l, acip));
        }
        int np = pt.numberOfParses();
        boolean goodness = expectedParses == null || expectedParses.length == np;
@ -7049,12 +7049,8 @@ tstHelper("ZUR");
    }

    private static void shelp(String s, String expectedErrors, String expectedScan) {
-        shelp(s, expectedErrors, false, expectedScan);
-    }
-
-    private static void shelp(String s, String expectedErrors, boolean lenientPeriods, String expectedScan) {
        StringBuffer errors = new StringBuffer();
-        ArrayList al = ACIPTshegBarScanner.scan(s, errors, lenientPeriods, -1);
+        ArrayList al = ACIPTshegBarScanner.scan(s, errors, -1);
        if (null != expectedScan) {
            if (!al.toString().equals(expectedScan)) {
                System.out.println("Scanning " + s + " into tsheg bars was expected to cause the following scan:");
@ -7075,18 +7071,14 @@ tstHelper("ZUR");
        }
    }

-    /** Tests {@link ACIPTshegBarScanner#scan(String, StringBuffer, boolean, int)}. */
+    /** Tests {@link ACIPTshegBarScanner#scan(String, StringBuffer, int)}. */
    public void testScanner() {
        shelp("LA...SGRUB",
              "",
-              "[TIBETAN_NON_PUNCTUATION:{LA}, TIBETAN_PUNCTUATION:{.}, TIBETAN_PUNCTUATION:{.}, TIBETAN_PUNCTUATION:{.}, TIBETAN_NON_PUNCTUATION:{SGRUB}]"); // DLC FIXME
-        shelp("PAS... LA",
-              "Offset 5 or maybe 5: A non-breaking tsheg, '.', appeared, but not like \"...,\" or \".,\" or \".dA\" or \".DA\".\n",
-              "[TIBETAN_NON_PUNCTUATION:{PAS}, TIBETAN_PUNCTUATION:{.}, TIBETAN_PUNCTUATION:{.}, ERROR:{A non-breaking tsheg, '.', appeared, but not like \"...,\" or \".,\" or \".dA\" or \".DA\".}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{LA}]");
+              "[TIBETAN_NON_PUNCTUATION:{LA}, TIBETAN_PUNCTUATION:{.}, TIBETAN_PUNCTUATION:{.}, TIBETAN_PUNCTUATION:{.}, TIBETAN_NON_PUNCTUATION:{SGRUB}]");
        shelp("PAS... LA",
              "",
-              true,
-              "[TIBETAN_NON_PUNCTUATION:{PAS}, TIBETAN_PUNCTUATION:{.}, TIBETAN_PUNCTUATION:{.}, TIBETAN_PUNCTUATION:{.}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{LA}]");
+              "[TIBETAN_NON_PUNCTUATION:{PAS}, TIBETAN_PUNCTUATION:{.}, TIBETAN_PUNCTUATION:{.}, TIBETAN_PUNCTUATION:{.}, WARNING:{A non-breaking tsheg, '.', appeared, but not like \"...,\" or \".,\" or \".dA\" or \".DA\".}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{LA}]");
        shelp("^GONG SA,",
              "",
              "[TIBETAN_NON_PUNCTUATION:{^GONG}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{SA}, TIBETAN_PUNCTUATION:{,}]");
@ -7220,7 +7212,7 @@ tstHelper("ZUR");
    }
    private static void uhelp(String acip, String expectedUnicode) {
        StringBuffer errors = new StringBuffer();
-        String unicode = ACIPConverter.convertToUnicode(acip, errors, null, true);
+        String unicode = ACIPConverter.convertToUnicode(acip, errors, null, true, "Most");
        if (null == unicode) {
            if (null != expectedUnicode && "none" != expectedUnicode) {
                System.out.println("No unicode exists for " + acip + " but you expected " + org.thdl.tib.text.tshegbar.UnicodeUtils.unicodeStringToPrettyString(expectedUnicode));
--- a/source/org/thdl/tib/text/ttt/TParseTree.java
+++ b/source/org/thdl/tib/text/ttt/TParseTree.java
@ -139,7 +139,7 @@ class TParseTree {

            // We give a warning about these, optionally, so that
            // users can produce output that even a dumb ACIP reader
-            // can understand.  See getWarning(true, ..).
+            // can understand.  See getWarning("All", ..).

            // if j is in this list, then up.get(j) is still a
            // potential winner.
@ -246,16 +246,24 @@ class TParseTree {

    /** Returns null if this parse tree is perfectly legal and valid.
     *  Returns a warning for users otherwise.  If and only if
-     *  paranoid is true, then even unambiguous ACIP like PADMA, which
-     *  could be improved by being written as PAD+MA, will cause a
-     *  warning.
-     *  @param paranoid true if you do not mind a lot of warnings
+     *  warningLevel is "All", then even unambiguous ACIP like PADMA,
+     *  which could be improved by being written as PAD+MA, will cause
+     *  a warning.
+     *  @param warningLevel "All" if you're paranoid, "Most" to see
+     *  warnings about lacking vowels on final stacks, "Some" to see
+     *  warnings about lacking vowels on non-final stacks and also
+     *  warnings about when prefix rules affect you, "None" if you
+     *  like to see IllegalArgumentExceptions.
     *  @param pl the pair list from which this parse tree originated
     *  @param originalACIP the original ACIP, or null if you want
     *  this parse tree to make a best guess. */
-    public String getWarning(boolean paranoid,
+    public String getWarning(String warningLevel,
                             TPairList pl,
                             String originalACIP) {
+        if (warningLevel != "Some"
+            && warningLevel != "Most"
+            && warningLevel != "All")
+            throw new IllegalArgumentException("warning level bad: is it interned?");

        {
            TStackList bestParse = getBestParse();
@ -276,18 +284,20 @@ class TParseTree {
                } else {
                    if (getBestParse().hasStackWithoutVowel(pl, isLastStack)) {
                        if (isLastStack[0]) {
+                            if (warningLevel == "All" || warningLevel == "Most")
                                return "Warning: The last stack does not have a vowel in the ACIP {" + ((null != originalACIP) ? originalACIP : recoverACIP()) + "}";
                        } else {
                            return "Warning: There is a stack, before the last stack, without a vowel in the ACIP {" + ((null != originalACIP) ? originalACIP : recoverACIP()) + "}";
                        }
                    }
-                    if (paranoid) {
+                    if ("All" == warningLevel) {
                        return "Warning: Though the ACIP {" + ((null != originalACIP) ? originalACIP : recoverACIP()) + "} is unambiguous, it would be more computer-friendly if + signs were used to stack things because there are two (or more) ways to interpret this ACIP if you're not careful.";
                    }
                }
            } else {
                if (nip.get(0).hasStackWithoutVowel(pl, isLastStack)) {
                    if (isLastStack[0]) {
+                        if (warningLevel == "All" || warningLevel == "Most")
                            return "Warning: The last stack does not have a vowel in the ACIP {" + ((null != originalACIP) ? originalACIP : recoverACIP()) + "}";
                    } else {
                        return "Warning: There is a stack, before the last stack, without a vowel in the ACIP {" + ((null != originalACIP) ? originalACIP : recoverACIP()) + "}";