Revamped some ACIP->Tibetan error messages.

2003-12-06 20:19:40 +00:00 · 2003-12-06 20:19:40 +00:00 · c43e9a446b
commit c43e9a446b
parent c9c771d1ee
3 changed files with 32 additions and 20 deletions
--- a/source/org/thdl/tib/text/ttt/ACIPConverter.java
+++ b/source/org/thdl/tib/text/ttt/ACIPConverter.java
@ -321,7 +321,7 @@ public class ACIPConverter {
                }
            } else if (stype == TString.TSHEG_BAR_ADORNMENT) {
                if (lastGuyWasNonPunct) {
-                    String err = "[#ERROR CONVERTING ACIP DOCUMENT: This converter cannot yet convert " + s.getText() + " because the converter's author is unclear what the result should be.]";
+                    String err = "[#ERROR CONVERTING ACIP DOCUMENT: This converter cannot convert the ACIP {" + s.getText() + "} to Tibetan because it is unclear what the result should be.]";
                    if (null != writer) {
                        String uni = ACIPRules.getUnicodeFor(s.getText(), false);
                        if (null == uni) {
@ -388,7 +388,7 @@ public class ACIPConverter {
                        if ((acipError = pls[0].getACIPError()) != null
                            && (null == pls[1] || pls[1].getACIPError() != null)) {
                            hasErrors = true;
-                            String errorMessage = "[#ERROR CONVERTING ACIP DOCUMENT: THE TSHEG BAR (\"SYLLABLE\") " + s.getText() + " HAS THESE ERRORS: " + acipError + "]";
+                            String errorMessage = "[#ERROR CONVERTING ACIP DOCUMENT: The tsheg bar (\"syllable\") " + s.getText() + " has these errors: " + acipError + "]";
                            if (null != writer) writer.write(errorMessage);
                            if (null != tdoc) {
                                tdoc.appendRoman(tdocLocation[0], errorMessage,
@ -403,7 +403,7 @@ public class ACIPConverter {
                                              ? null : pls[1].getParseTree());
                            if (null == pt0 && null == pt1) {
                                hasErrors = true;
-                                String errorMessage = "[#ERROR CONVERTING ACIP DOCUMENT: THE TSHEG BAR (\"SYLLABLE\") " + s.getText() + " IS ESSENTIALLY NOTHING.]";
+                                String errorMessage = "[#ERROR CONVERTING ACIP DOCUMENT: The tsheg bar (\"syllable\") " + s.getText() + " is essentially nothing.]";
                                if (null != writer) writer.write(errorMessage);
                                if (null != tdoc) {
                                    tdoc.appendRoman(tdocLocation[0], errorMessage,
@ -417,8 +417,18 @@ public class ACIPConverter {
                                TStackList sl1 = ((null == pt1)
                                                  ? null : pt1.getBestParse());
                                if (null == sl0 && null == sl1) {
+                                    // I don't think this can happen
+                                    // nowadays; early in the
+                                    // converter's life, parsing of
+                                    // tsheg bars was handled
+                                    // differently, but now, I think
+                                    // this is impossible.  DLC FIXME:
+                                    // run with -Dthdl.debug=true on
+                                    // all ACIP Release IV texts you
+                                    // can find.
+                                    ThdlDebug.noteIffyCode();
                                    hasErrors = true;
-                                    String errorMessage = "[#ERROR CONVERTING ACIP DOCUMENT: THE TSHEG BAR (\"SYLLABLE\") " + s.getText() + " HAS NO LEGAL PARSES.]";
+                                    String errorMessage = "[#ERROR CONVERTING ACIP DOCUMENT: The tsheg bar (\"syllable\") " + s.getText() + " has no legal parses.]";
                                    if (null != writer) writer.write(errorMessage);
                                    if (null != tdoc) {
                                        tdoc.appendRoman(tdocLocation[0],
--- a/source/org/thdl/tib/text/ttt/ACIPTshegBarScanner.java
+++ b/source/org/thdl/tib/text/ttt/ACIPTshegBarScanner.java
@ -872,7 +872,7 @@ public class ACIPTshegBarScanner {
                        }
                    }
                    if ('%' == ch) {
-                        al.add(new TString("The ACIP {%} is treated by this converter as U+0F35, but sometimes might represent U+0F14 in practice",
+                        al.add(new TString("The ACIP {%} is treated by this converter as U+0F35, but sometimes might represent U+0F14 in practice.  To avoid seeing this warning again, change the input to use {\\u0F35} instead of {%}.",
                                           TString.WARNING));
                    }
                }
@ -924,11 +924,13 @@ public class ACIPTshegBarScanner {
                            startOfString = i+1;
                            break;
                        } else {
-                            al.add(new TString("Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly.  Sorry!  Please do complain to the maintainers.",
+                            final String msg
+                                = "Found a backslash, \\, which the ACIP Tibetan Input Code standard says represents a Sanskrit virama.  In practice, though, this is so often misused (to represent U+0F3D) that {\\} always generates this error.  If you want a Sanskrit virama, change the input document to use {\\u0F84} instead of {\\}.  If you want U+0F3D, use {/NYA/} or {/NYA\\u0F3D}.";
+                            al.add(new TString(msg,
                                               TString.ERROR));
                            if (null != errors)
                                errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": "
-                                              + "Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly.  Sorry!  Please do complain to the maintainers.\n");
+                                              + msg + "\n");
                        }
                    } else {
                        al.add(new TString("Found an illegal character, " + ch + ", with ordinal " + (int)ch + ".",
--- a/source/org/thdl/tib/text/ttt/PackageTest.java
+++ b/source/org/thdl/tib/text/ttt/PackageTest.java
@ -7298,15 +7298,15 @@ tstHelper("ZUR");
            uhelp(" 1\\ ", "\u0f0b\u0f21\u0f84\u0f0b");
        }
        shelp("K\\,",
-              "Offset 1: Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly.  Sorry!  Please do complain to the maintainers.\n",
-              "[TIBETAN_NON_PUNCTUATION:{K}, ERROR:{Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly.  Sorry!  Please do complain to the maintainers.}, TIBETAN_PUNCTUATION:{,}]");
+              "Offset 1: Found a backslash, \\, which the ACIP Tibetan Input Code standard says represents a Sanskrit virama.  In practice, though, this is so often misused (to represent U+0F3D) that {\\} always generates this error.  If you want a Sanskrit virama, change the input document to use {\\u0F84} instead of {\\}.  If you want U+0F3D, use {/NYA/} or {/NYA\\u0F3D}.\n",
+              "[TIBETAN_NON_PUNCTUATION:{K}, ERROR:{Found a backslash, \\, which the ACIP Tibetan Input Code standard says represents a Sanskrit virama.  In practice, though, this is so often misused (to represent U+0F3D) that {\\} always generates this error.  If you want a Sanskrit virama, change the input document to use {\\u0F84} instead of {\\}.  If you want U+0F3D, use {/NYA/} or {/NYA\\u0F3D}.}, TIBETAN_PUNCTUATION:{,}]");


-        shelp("MTHAR%", "", "[TIBETAN_NON_PUNCTUATION:{MTHAR}, TSHEG_BAR_ADORNMENT:{%}, WARNING:{The ACIP {%} is treated by this converter as U+0F35, but sometimes might represent U+0F14 in practice}]");
+        shelp("MTHAR%", "", "[TIBETAN_NON_PUNCTUATION:{MTHAR}, TSHEG_BAR_ADORNMENT:{%}, WARNING:{The ACIP {%} is treated by this converter as U+0F35, but sometimes might represent U+0F14 in practice.  To avoid seeing this warning again, change the input to use {\\u0F35} instead of {%}.}]");
        shelp("MTHARo", "", "[TIBETAN_NON_PUNCTUATION:{MTHAR}, TSHEG_BAR_ADORNMENT:{o}]");
        shelp("MTHARx", "", "[TIBETAN_NON_PUNCTUATION:{MTHAR}, TSHEG_BAR_ADORNMENT:{x}]");

-        shelp("MTHAR\n%", "", "[TIBETAN_NON_PUNCTUATION:{MTHAR}, TIBETAN_PUNCTUATION:{ }, ERROR:{The ACIP % must be glued to the end of a tsheg bar, but this one was not}, WARNING:{The ACIP {%} is treated by this converter as U+0F35, but sometimes might represent U+0F14 in practice}]");
+        shelp("MTHAR\n%", "", "[TIBETAN_NON_PUNCTUATION:{MTHAR}, TIBETAN_PUNCTUATION:{ }, ERROR:{The ACIP % must be glued to the end of a tsheg bar, but this one was not}, WARNING:{The ACIP {%} is treated by this converter as U+0F35, but sometimes might represent U+0F14 in practice.  To avoid seeing this warning again, change the input to use {\\u0F35} instead of {%}.}]");
        shelp("MTHAR x", "", "[TIBETAN_NON_PUNCTUATION:{MTHAR}, TIBETAN_PUNCTUATION:{ }, ERROR:{The ACIP x must be glued to the end of a tsheg bar, but this one was not}]");

        shelp("PHYIR;", "", "[TIBETAN_NON_PUNCTUATION:{PHYIR}, TIBETAN_PUNCTUATION:{;}]");
@ -7372,8 +7372,8 @@ tstHelper("ZUR");
              "Offset 13: Found an open bracket within a [#COMMENT]-style comment.  Brackets may not appear in comments.\nOffset 38: Found an illegal character, y, with ordinal 121.\nOffset 40: Found an illegal character, u, with ordinal 117.\nOffset 42: Found an illegal character, k, with ordinal 107.\nOffset 45: Found an illegal character, w, with ordinal 119.\nOffset 47: Found a truly unmatched close bracket, ] or }.\nOffset 47: Found a closing bracket without a matching open bracket.  Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n",
              "[ERROR:{Found an open bracket within a [#COMMENT]-style comment.  Brackets may not appear in comments.\n}, COMMENT:{[# This is a [# nested comment]}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{d}, TSHEG_BAR_ADORNMENT:{o}, TIBETAN_NON_PUNCTUATION:{n't}, TIBETAN_PUNCTUATION:{ }, ERROR:{Found an illegal character, y, with ordinal 121.}, ERROR:{The ACIP o must be glued to the end of a tsheg bar, but this one was not}, ERROR:{Found an illegal character, u, with ordinal 117.}, TIBETAN_PUNCTUATION:{ }, ERROR:{Found an illegal character, k, with ordinal 107.}, TIBETAN_NON_PUNCTUATION:{n}, TSHEG_BAR_ADORNMENT:{o}, ERROR:{Found an illegal character, w, with ordinal 119.}, QUESTION:{?}, ERROR:{Found a truly unmatched close bracket, ]}, ERROR:{Found a closing bracket without a matching open bracket.  Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.}, TIBETAN_NON_PUNCTUATION:{KA}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{KHA}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{GA}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{NGA}]");
        shelp("//NYA\\\\",
-              "Offset 1: Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.\nOffset 5: Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly.  Sorry!  Please do complain to the maintainers.\nOffset 6: Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly.  Sorry!  Please do complain to the maintainers.\n",
-              "[START_SLASH:{/}, ERROR:{Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.}, END_SLASH:{/}, TIBETAN_NON_PUNCTUATION:{NYA}, ERROR:{Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly.  Sorry!  Please do complain to the maintainers.}, ERROR:{Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly.  Sorry!  Please do complain to the maintainers.}]");
+              "Offset 1: Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.\nOffset 5: Found a backslash, \\, which the ACIP Tibetan Input Code standard says represents a Sanskrit virama.  In practice, though, this is so often misused (to represent U+0F3D) that {\\} always generates this error.  If you want a Sanskrit virama, change the input document to use {\\u0F84} instead of {\\}.  If you want U+0F3D, use {/NYA/} or {/NYA\\u0F3D}.\nOffset 6: Found a backslash, \\, which the ACIP Tibetan Input Code standard says represents a Sanskrit virama.  In practice, though, this is so often misused (to represent U+0F3D) that {\\} always generates this error.  If you want a Sanskrit virama, change the input document to use {\\u0F84} instead of {\\}.  If you want U+0F3D, use {/NYA/} or {/NYA\\u0F3D}.\n",
+              "[START_SLASH:{/}, ERROR:{Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.}, END_SLASH:{/}, TIBETAN_NON_PUNCTUATION:{NYA}, ERROR:{Found a backslash, \\, which the ACIP Tibetan Input Code standard says represents a Sanskrit virama.  In practice, though, this is so often misused (to represent U+0F3D) that {\\} always generates this error.  If you want a Sanskrit virama, change the input document to use {\\u0F84} instead of {\\}.  If you want U+0F3D, use {/NYA/} or {/NYA\\u0F3D}.}, ERROR:{Found a backslash, \\, which the ACIP Tibetan Input Code standard says represents a Sanskrit virama.  In practice, though, this is so often misused (to represent U+0F3D) that {\\} always generates this error.  If you want a Sanskrit virama, change the input document to use {\\u0F84} instead of {\\}.  If you want U+0F3D, use {/NYA/} or {/NYA\\u0F3D}.}]");

    }
    private static void uhelp(String acip) {
@ -7441,13 +7441,13 @@ M+NA
        uhelp("KO&HAm,", "\u0F40\u0F7C\u0F85\u0F67\u0F7E\u0F0D");
        uhelp("x", "[#ERROR CONVERTING ACIP DOCUMENT: Lexical error: The ACIP x must be glued to the end of a tsheg bar, but this one was not]");
        uhelp("o", "[#ERROR CONVERTING ACIP DOCUMENT: Lexical error: The ACIP o must be glued to the end of a tsheg bar, but this one was not]");
-        uhelp("%", "[#ERROR CONVERTING ACIP DOCUMENT: Lexical error: The ACIP % must be glued to the end of a tsheg bar, but this one was not][#WARNING CONVERTING ACIP DOCUMENT: Lexical warning: The ACIP {%} is treated by this converter as U+0F35, but sometimes might represent U+0F14 in practice]");
-        uhelp(":", "[#ERROR CONVERTING ACIP DOCUMENT: THE TSHEG BAR (\"SYLLABLE\") : HAS THESE ERRORS: Cannot convert ACIP A: because A: is a \"vowel\" without an associated consonant]");
-        uhelp("m", "[#ERROR CONVERTING ACIP DOCUMENT: THE TSHEG BAR (\"SYLLABLE\") m HAS THESE ERRORS: Cannot convert ACIP Am because Am is a \"vowel\" without an associated consonant]");
+        uhelp("%", "[#ERROR CONVERTING ACIP DOCUMENT: Lexical error: The ACIP % must be glued to the end of a tsheg bar, but this one was not][#WARNING CONVERTING ACIP DOCUMENT: Lexical warning: The ACIP {%} is treated by this converter as U+0F35, but sometimes might represent U+0F14 in practice.  To avoid seeing this warning again, change the input to use {\\u0F35} instead of {%}.]");
+        uhelp(":", "[#ERROR CONVERTING ACIP DOCUMENT: The tsheg bar (\"syllable\") : has these errors: Cannot convert ACIP A: because A: is a \"vowel\" without an associated consonant]");
+        uhelp("m", "[#ERROR CONVERTING ACIP DOCUMENT: The tsheg bar (\"syllable\") m has these errors: Cannot convert ACIP Am because Am is a \"vowel\" without an associated consonant]");

        uhelp("N+YA", "\u0f53\u0fb1");
        uhelp("NA+YA", "\u0f53\u0fb1"); // FIXME: warn about the extra A
-        uhelp("NE+YA", "[#ERROR CONVERTING ACIP DOCUMENT: THE TSHEG BAR (\"SYLLABLE\") NE+YA HAS THESE ERRORS: Cannot convert ACIP NE+-YA because + is not an ACIP consonant]");
+        uhelp("NE+YA", "[#ERROR CONVERTING ACIP DOCUMENT: The tsheg bar (\"syllable\") NE+YA has these errors: Cannot convert ACIP NE+-YA because + is not an ACIP consonant]");
        uhelp("tRAStA", "\u0f4a\u0fb2\u0f66\u0f9a");
        uhelp("DZHDZHA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP DZHDZHA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.]\u0f5c\u0fac"); // tricky because DZHDZA is not in TMW but DZHDZHA is
        uhelp("DZHDZA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP DZHDZA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.]\u0f5c\u0fab");
@ -7463,14 +7463,14 @@ M+NA

        uhelp("THAG PA", "\u0f50\u0f42\u0f0b\u0f54");
        uhelp("KA \nKHA\n\nGA", "\u0f40\u0f0b\u0f41\u0f0b\n\n\u0f42");
-        uhelp("KA%\nKHA", "\u0f40\u0f35[#WARNING CONVERTING ACIP DOCUMENT: Lexical warning: The ACIP {%} is treated by this converter as U+0F35, but sometimes might represent U+0F14 in practice]\u0f0b\u0f41");
-        uhelp("KA%", "\u0f40\u0f35[#WARNING CONVERTING ACIP DOCUMENT: Lexical warning: The ACIP {%} is treated by this converter as U+0F35, but sometimes might represent U+0F14 in practice]");
+        uhelp("KA%\nKHA", "\u0f40\u0f35[#WARNING CONVERTING ACIP DOCUMENT: Lexical warning: The ACIP {%} is treated by this converter as U+0F35, but sometimes might represent U+0F14 in practice.  To avoid seeing this warning again, change the input to use {\\u0F35} instead of {%}.]\u0f0b\u0f41");
+        uhelp("KA%", "\u0f40\u0f35[#WARNING CONVERTING ACIP DOCUMENT: Lexical warning: The ACIP {%} is treated by this converter as U+0F35, but sometimes might represent U+0F14 in practice.  To avoid seeing this warning again, change the input to use {\\u0F35} instead of {%}.]");
        uhelp("KAo", "\u0f40\u0f37");
        uhelp("KAo\n\nKA", "\u0f40\u0f37\u0f0b\n\n\u0f40");
        uhelp("KAo\nKHA", "\u0f40\u0f37\u0f0b\u0f41");
        uhelp("KAo KHA", "\u0f40\u0f37\u0f0b\u0f41");
        uhelp("KA KAo KHA", "\u0f40\u0f0b\u0f40\u0f37\u0f0b\u0f41");
-        uhelp("KAx", "\u0f40[#ERROR CONVERTING ACIP DOCUMENT: This converter cannot yet convert x because the converter's author is unclear what the result should be.]");
+        uhelp("KAx", "\u0f40[#ERROR CONVERTING ACIP DOCUMENT: This converter cannot convert the ACIP {x} to Tibetan because it is unclear what the result should be.]");
        uhelp("G+DHA", "\u0f42\u0fa2");
        uhelp("P'EE", "\u0f54\u0f71\u0f7b");