diff --git a/source/org/thdl/tib/text/ttt/ACIPConverter.java b/source/org/thdl/tib/text/ttt/ACIPConverter.java index ccc9c95..143c0cf 100644 --- a/source/org/thdl/tib/text/ttt/ACIPConverter.java +++ b/source/org/thdl/tib/text/ttt/ACIPConverter.java @@ -1,6 +1,4 @@ -// DLC NOW: 'US etc. -- do we handle them all? -// DLC NOW WARN ON NNYA and DBA -// DLC NOW: implement Robert Chilton-supplied prefix rules +// DLC NOW WARN ON DBA /* The contents of this file are subject to the THDL Open Community License Version 1.0 (the "License"); you may not use this file except in compliance @@ -45,10 +43,6 @@ public class ACIPConverter { // DLC NOW: BAo isn't converting. - // DLC NOW: tRAStA is not converter correctly to Unicode, and no - // warning is given when converting to TMW (Wait! isn't the "a - // stack occurs w/o a vowel" warning given?) - /** Command-line converter. Gives error messages on standard * output about why we can't convert the document perfectly and * exits with non-zero return code, or is silent otherwise and diff --git a/source/org/thdl/tib/text/ttt/PackageTest.java b/source/org/thdl/tib/text/ttt/PackageTest.java index e5a7e8d..2632f44 100644 --- a/source/org/thdl/tib/text/ttt/PackageTest.java +++ b/source/org/thdl/tib/text/ttt/PackageTest.java @@ -7364,12 +7364,14 @@ tstHelper("ZUR"); } public void testACIPConversion() { - uhelp("DZHDZHA", "\u0f5c\u0fac"); // tricky because DZHDZA is not in TMW but DZHDZHA is - uhelp("DZHDZA", "\u0f5c\u0fab"); + uhelp("tRAStA", "\u0f4a\u0fb2\u0f66\u0f9a"); + uhelp("DZHDZHA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP DZHDZHA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.]\u0f5c\u0fac"); // tricky because DZHDZA is not in TMW but DZHDZHA is + uhelp("DZHDZA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP DZHDZA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.]\u0f5c\u0fab"); uhelp("P+S+N+YA", "\u0f54\u0fb6\u0fa3\u0fb1"); - uhelp("PSNYA", "\u0f54\u0fb6\u0f99"); // Is this P+S+N+YA? No, it's P+S+NYA. But, DLC, warn! - uhelp("NNYA", "\u0f53\u0f99"); // DLC warn - uhelp("GHNYA", "\u0f43\u0f99"); + uhelp("P+S+NYA", "\u0f54\u0fb6\u0f99"); + uhelp("PSNYA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP PSNYA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.]\u0f54\u0fb6\u0f99"); // Is this P+S+N+YA? No, it's P+S+NYA. But warn! + uhelp("NNYA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP NNYA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.]\u0f53\u0f99"); + uhelp("GHNYA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP GHNYA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.]\u0f43\u0f99"); // TS+NYA and T+S+N+YA are both legal, so what is TSNYA? // Private correspondence with Robert Chilton says that it is diff --git a/source/org/thdl/tib/text/ttt/TParseTree.java b/source/org/thdl/tib/text/ttt/TParseTree.java index 36c190b..9041910 100644 --- a/source/org/thdl/tib/text/ttt/TParseTree.java +++ b/source/org/thdl/tib/text/ttt/TParseTree.java @@ -322,6 +322,58 @@ class TParseTree { } } } + + // Check for things like DZHDZ+H: stacks that have some pluses + // but not all pluses. + // + // Check for things like TSNYA: stacks that could be + // mistransliterations of T+S+N+YA + // + // Check for useless disambiguators. + { + int plnum = 0; + String swarn + = "There is a stack of three or more consonants in " + ((null != originalACIP) ? originalACIP : recoverACIP()) + " that uses at least one '+' but does not use a '+' between each consonant."; + String disamWarn + = "There is a useless disambiguator in " + ((null != originalACIP) ? originalACIP : recoverACIP()) + "."; + while (plnum < pl.size() && pl.get(plnum).isDisambiguator()) { + ++plnum; + return disamWarn; + } + for (int stackNum = 0; stackNum < bestParse.size(); stackNum++) { + TPairList stack = bestParse.get(stackNum); + int type = 0; + int stackSize = stack.size(); + boolean hasAmbiguousConsonant = false; // TS could be TSA or T+SA, so it's "ambiguous" + for (int j = 0; j < stackSize; j++) { + TPair tp = pl.get(plnum++); + if (j + 1 < stack.size()) { + if (null == tp.getRight()) { + if (type == 0) + type = -1; + else if (type == 1) + return swarn; + } else { + if (type == 0) + type = 1; + else if (type == -1) + return swarn; + } + } + if (stackSize > 1 && tp.getLeft() != null && tp.getLeft().length() > 1) { + hasAmbiguousConsonant = true; + } + } + if (hasAmbiguousConsonant && -1 == type) { + if ("Most" == warningLevel || "All" == warningLevel) + return "There is a chance that the ACIP " + ((null != originalACIP) ? originalACIP : recoverACIP()) + " was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too."; + } + while (plnum < pl.size() && pl.get(plnum).isDisambiguator()) { + ++plnum; + return disamWarn; + } + } + } return null; }