Added three new kinds of warnings to ACIP->Tibetan conversions.

2003-10-21 02:00:49 +00:00 · 2003-10-21 02:00:49 +00:00 · 2f81a801ef
commit 2f81a801ef
parent a47af2c165
3 changed files with 60 additions and 12 deletions
--- a/source/org/thdl/tib/text/ttt/ACIPConverter.java
+++ b/source/org/thdl/tib/text/ttt/ACIPConverter.java
@ -1,6 +1,4 @@
-// DLC NOW: 'US etc. -- do we handle them all?
+// DLC NOW WARN ON DBA
 // DLC NOW WARN ON NNYA and DBA
 // DLC NOW: implement Robert Chilton-supplied prefix rules
 /*
 The contents of this file are subject to the THDL Open Community License
 Version 1.0 (the "License"); you may not use this file except in compliance
@ -45,10 +43,6 @@ public class ACIPConverter {
    // DLC NOW: BAo isn't converting.
    // DLC NOW: tRAStA is not converter correctly to Unicode, and no
    // warning is given when converting to TMW (Wait!  isn't the "a
    // stack occurs w/o a vowel" warning given?)
    /** Command-line converter.  Gives error messages on standard
     *  output about why we can't convert the document perfectly and
     *  exits with non-zero return code, or is silent otherwise and
--- a/source/org/thdl/tib/text/ttt/PackageTest.java
+++ b/source/org/thdl/tib/text/ttt/PackageTest.java
@ -7364,12 +7364,14 @@ tstHelper("ZUR");
    }
    public void testACIPConversion() {
-        uhelp("DZHDZHA", "\u0f5c\u0fac"); // tricky because DZHDZA is not in TMW but DZHDZHA is
+        uhelp("tRAStA", "\u0f4a\u0fb2\u0f66\u0f9a");
-        uhelp("DZHDZA", "\u0f5c\u0fab");
+        uhelp("DZHDZHA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP DZHDZHA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.]\u0f5c\u0fac"); // tricky because DZHDZA is not in TMW but DZHDZHA is
        uhelp("DZHDZA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP DZHDZA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.]\u0f5c\u0fab");
        uhelp("P+S+N+YA", "\u0f54\u0fb6\u0fa3\u0fb1");
-        uhelp("PSNYA", "\u0f54\u0fb6\u0f99"); // Is this P+S+N+YA?  No, it's P+S+NYA.  But, DLC, warn!
+        uhelp("P+S+NYA", "\u0f54\u0fb6\u0f99");
-        uhelp("NNYA", "\u0f53\u0f99"); // DLC warn
+        uhelp("PSNYA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP PSNYA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.]\u0f54\u0fb6\u0f99"); // Is this P+S+N+YA?  No, it's P+S+NYA.  But warn!
-        uhelp("GHNYA", "\u0f43\u0f99");
+        uhelp("NNYA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP NNYA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.]\u0f53\u0f99");
        uhelp("GHNYA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP GHNYA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.]\u0f43\u0f99");
        // TS+NYA and T+S+N+YA are both legal, so what is TSNYA?
        // Private correspondence with Robert Chilton says that it is
--- a/source/org/thdl/tib/text/ttt/TParseTree.java
+++ b/source/org/thdl/tib/text/ttt/TParseTree.java
@ -322,6 +322,58 @@ class TParseTree {
                }
            }
        }
        // Check for things like DZHDZ+H: stacks that have some pluses
        // but not all pluses.
        //
        // Check for things like TSNYA: stacks that could be
        // mistransliterations of T+S+N+YA
        //
        // Check for useless disambiguators.
        {
            int plnum = 0;
            String swarn
                = "There is a stack of three or more consonants in " + ((null != originalACIP) ? originalACIP : recoverACIP()) + " that uses at least one '+' but does not use a '+' between each consonant.";
            String disamWarn
                = "There is a useless disambiguator in " + ((null != originalACIP) ? originalACIP : recoverACIP()) + ".";
            while (plnum < pl.size() && pl.get(plnum).isDisambiguator()) {
                ++plnum;
                return disamWarn;
            }
            for (int stackNum = 0; stackNum < bestParse.size(); stackNum++) {
                TPairList stack = bestParse.get(stackNum);
                int type = 0;
                int stackSize = stack.size();
                boolean hasAmbiguousConsonant = false; // TS could be TSA or T+SA, so it's "ambiguous"
                for (int j = 0; j < stackSize; j++) {
                    TPair tp = pl.get(plnum++);
                    if (j + 1 < stack.size()) {
                        if (null == tp.getRight()) {
                            if (type == 0)
                                type = -1;
                            else if (type == 1)
                                return swarn;
                        } else {
                            if (type == 0)
                                type = 1;
                            else if (type == -1)
                                return swarn;
                        }
                    }
                    if (stackSize > 1 && tp.getLeft() != null && tp.getLeft().length() > 1) {
                        hasAmbiguousConsonant = true;
                    }
                }
                if (hasAmbiguousConsonant && -1 == type) {
                    if ("Most" == warningLevel || "All" == warningLevel)
                        return "There is a chance that the ACIP " + ((null != originalACIP) ? originalACIP : recoverACIP()) + " was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.";
                }
                while (plnum < pl.size() && pl.get(plnum).isDisambiguator()) {
                    ++plnum;
                    return disamWarn;
                }
            }
        }
        return null;
    }