From c1aa81e94337295e428f6ead8875fce5d1b686fa Mon Sep 17 00:00:00 2001 From: dchandler Date: Tue, 16 Dec 2003 07:45:40 +0000 Subject: [PATCH] RFE 860190: ACIP->Unicode now gives a warning when it outputs something that can't be represented in TMW. --- .../org/thdl/tib/text/ttt/ACIPConverter.java | 35 +++++++++++++++---- source/org/thdl/tib/text/ttt/PackageTest.java | 24 +++++++------ source/org/thdl/tib/text/ttt/TPairList.java | 2 +- 3 files changed, 42 insertions(+), 19 deletions(-) diff --git a/source/org/thdl/tib/text/ttt/ACIPConverter.java b/source/org/thdl/tib/text/ttt/ACIPConverter.java index e645141..1a27778 100644 --- a/source/org/thdl/tib/text/ttt/ACIPConverter.java +++ b/source/org/thdl/tib/text/ttt/ACIPConverter.java @@ -436,10 +436,7 @@ public class ACIPConverter { // converter's life, parsing of // tsheg bars was handled // differently, but now, I think - // this is impossible. DLC FIXME: - // run with -Dthdl.debug=true on - // all ACIP Release V texts you - // can find. + // this is impossible. ThdlDebug.noteIffyCode(); hasErrors = true; String errorMessage = "[#ERROR CONVERTING ACIP DOCUMENT: The tsheg bar (\"syllable\") " + s.getText() + " has no legal parses.]"; @@ -501,6 +498,28 @@ public class ACIPConverter { if (null != writer) { unicode = sl.getUnicode(); if (null == unicode) throw new Error("FIXME: make this an assertion 4"); + // Warn if any of the stacks + // in this tsheg bar do not + // have corresponding glyphs + // in TMW. That means there + // was probably a typo in the + // input. + if ("None" != warningLevel) { + Object[] trialDuff = sl.getDuff(); + for (int ii = 0; ii < trialDuff.length; ii++) { + if (trialDuff[ii] instanceof String) { + String bwarning + = "[#WARNING CONVERTING ACIP DOCUMENT: " + + (String)trialDuff[ii] + "]"; + unicode = bwarning + unicode; + if (null != hasWarnings) hasWarnings[0] = true; + if (null != warnings) { + warnings.append(bwarning); + warnings.append('\n'); + } + } + } + } } if (null != tdoc) { duff = sl.getDuff(); @@ -673,12 +692,14 @@ public class ACIPConverter { color); else { hasErrors = true; + String emsg + = "[ERROR: " + (String)duff[j] + "]"; if (null != errors) - errors.append((String)duff[j] + "\n"); + errors.append(emsg + "\n"); tdoc.appendRoman(tdocLocation[0], - (String)duff[j], + emsg, Color.RED); - tdocLocation[0] += ((String)duff[j]).length(); + tdocLocation[0] += emsg.length(); } } } else { diff --git a/source/org/thdl/tib/text/ttt/PackageTest.java b/source/org/thdl/tib/text/ttt/PackageTest.java index decc024..a954bb4 100644 --- a/source/org/thdl/tib/text/ttt/PackageTest.java +++ b/source/org/thdl/tib/text/ttt/PackageTest.java @@ -7448,13 +7448,13 @@ M+NA uhelp("N+YA", "\u0f53\u0fb1"); uhelp("NA+YA", "\u0f53\u0fb1"); // FIXME: warn about the extra A uhelp("NE+YA", "[#ERROR CONVERTING ACIP DOCUMENT: The tsheg bar (\"syllable\") NE+YA has these errors: Cannot convert ACIP NE+-YA because + is not an ACIP consonant]"); - uhelp("tRAStA", "\u0f4a\u0fb2\u0f66\u0f9a"); + uhelp("tRAStA", "[#WARNING CONVERTING ACIP DOCUMENT: The ACIP {t+RA} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f4a\u0fb2\u0f66\u0f9a"); uhelp("DZHDZHA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP DZHDZHA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.]\u0f5b\u0fb7\u0fab\u0fb7"); // tricky because DZHDZA is not in TMW but DZHDZHA is - uhelp("DZHDZA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP DZHDZA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.]\u0f5b\u0fb7\u0fab"); + uhelp("DZHDZA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP DZHDZA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.][#WARNING CONVERTING ACIP DOCUMENT: The ACIP {DZH+DZA} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f5b\u0fb7\u0fab"); uhelp("P+S+N+YA", "\u0f54\u0fb6\u0fa3\u0fb1"); - uhelp("P+S+NYA", "\u0f54\u0fb6\u0f99"); - uhelp("PSNYA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP PSNYA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.]\u0f54\u0fb6\u0f99"); // Is this P+S+N+YA? No, it's P+S+NYA. But warn! - uhelp("NNYA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP NNYA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.]\u0f53\u0f99"); + uhelp("P+S+NYA", "[#WARNING CONVERTING ACIP DOCUMENT: The ACIP {P+S+NYA} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f54\u0fb6\u0f99"); + uhelp("PSNYA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP PSNYA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.][#WARNING CONVERTING ACIP DOCUMENT: The ACIP {P+S+NYA} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f54\u0fb6\u0f99"); // Is this P+S+N+YA? No, it's P+S+NYA. But warn! + uhelp("NNYA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP NNYA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.][#WARNING CONVERTING ACIP DOCUMENT: The ACIP {N+NYA} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f53\u0f99"); uhelp("GHNYA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP GHNYA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.]\u0f42\u0fb7\u0f99"); // TS+NYA and T+S+N+YA are both legal, so what is TSNYA? @@ -7525,7 +7525,7 @@ M+NA uhelp("*#HUm: G+DHOO GRO`;.,", "\u0f04\u0f05\u0f04\u0f05\u0f05\u0f67\u0f74\u0f7e\u0f7f\u0f0b\u0f42\u0fa1\u0fb7\u0f7d\u0f0b\u0f42\u0fb2\u0f7c\u0f08\u0f11\u0f0c\u0f0d"); uhelp("*#HUm: K+DHA GRO`;.,", - "\u0f04\u0f05\u0f04\u0f05\u0f05\u0f67\u0f74\u0f7e\u0f7f\u0f0b\u0f40\u0fa1\u0fb7\u0f0b\u0f42\u0fb2\u0f7c\u0f08\u0f11\u0f0c\u0f0d"); + "\u0f04\u0f05\u0f04\u0f05\u0f05\u0f67\u0f74\u0f7e\u0f7f\u0f0b[#WARNING CONVERTING ACIP DOCUMENT: The ACIP {K+DHA} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f40\u0fa1\u0fb7\u0f0b\u0f42\u0fb2\u0f7c\u0f08\u0f11\u0f0c\u0f0d"); uhelp("HA,\nHA\n\nHA", "\u0f67\u0f0d \u0f67\u0f0b\n\n\u0f67"); uhelp("NGA,", "\u0f44\u0f0c\u0f0d"); uhelp("NGA,\nHA\n\nHA", "\u0f44\u0f0c\u0f0d \u0f67\u0f0b\n\n\u0f67"); @@ -7538,6 +7538,8 @@ M+NA uhelp("GU, ,KHO", "\u0f42\u0f74\u0f0d \u0f0d\u0f41\u0f7c"); uhelp("GU ,KHO", "\u0f42\u0f74\u0f0b \u0f0d\u0f41\u0f7c"); // FIXME: missing a shad after GU, warn about that. uhelp("GA HA", "\u0f42\u0f0b \u0f67"); + uhelp("BCWA", "[#WARNING CONVERTING ACIP DOCUMENT: The ACIP {B+C+WA} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f56\u0f95\u0fba"); + uhelp("'KYO", "[#WARNING CONVERTING ACIP DOCUMENT: Warning: We're going with {'+K+YO}, but only because our knowledge of prefix rules says that {'}{K+YO} is not a legal Tibetan tsheg bar (\"syllable\")][#WARNING CONVERTING ACIP DOCUMENT: The ACIP {'+K+YO} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f60\u0f90\u0fb1\u0f7c"); uhelp("WA", "\u0f5d"); uhelp("W", "\u0f5d"); uhelp("WO", "\u0f5d\u0f7c"); @@ -7555,21 +7557,21 @@ M+NA uhelp("WRA", "\u0f5d\u0fb2"); uhelp("W+RA", "\u0f5d\u0fb2"); uhelp("W+R", "\u0f5d\u0fb2"); - uhelp("BCWA", "\u0f56\u0f95\u0fba"); - uhelp("BCW", "\u0f56\u0f95\u0fba"); - uhelp("BCWO", "\u0f56\u0f95\u0fba\u0f7c"); + uhelp("BCWA", "[#WARNING CONVERTING ACIP DOCUMENT: The ACIP {B+C+WA} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f56\u0f95\u0fba"); + uhelp("BCW", "[#WARNING CONVERTING ACIP DOCUMENT: The ACIP {B+C+W} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f56\u0f95\u0fba"); + uhelp("BCWO", "[#WARNING CONVERTING ACIP DOCUMENT: The ACIP {B+C+WO} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f56\u0f95\u0fba\u0f7c"); uhelp("BCVA", "\u0f56\u0f45\u0fad"); uhelp("BCV", "\u0f56\u0f45\u0fad"); uhelp("BCV'O", "\u0f56\u0f45\u0fad\u0f71\u0f7c"); uhelp("BCV'A", "\u0f56\u0f45\u0fad\u0f71"); - uhelp("BCV'", "\u0f56\u0f95\u0fad\u0fb0"); + uhelp("BCV'", "[#WARNING CONVERTING ACIP DOCUMENT: The ACIP {B+C+V+'} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f56\u0f95\u0fad\u0fb0"); uhelp("GYA", "\u0f42\u0fb1"); uhelp("GY", "\u0f42\u0fb1"); uhelp("G-YA", "\u0f42\u0f61"); uhelp("GA-YA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a useless disambiguator in GA-YA.]\u0f42\u0f61"); uhelp("GA-YO", "[#WARNING CONVERTING ACIP DOCUMENT: There is a useless disambiguator in GA-YO.]\u0f42\u0f61\u0F7c"); uhelp("RTZVA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP RTZVA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.]\u0f62\u0fa9\u0fad"); - uhelp("RTZWA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP RTZWA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.]\u0f62\u0fa9\u0fba"); + uhelp("RTZWA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP RTZWA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.][#WARNING CONVERTING ACIP DOCUMENT: The ACIP {R+TZ+WA} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f62\u0fa9\u0fba"); } public void testFixedFormSubjoinedConsonants() { // Usual subjoined RA: diff --git a/source/org/thdl/tib/text/ttt/TPairList.java b/source/org/thdl/tib/text/ttt/TPairList.java index 3bd1c77..2625f9d 100644 --- a/source/org/thdl/tib/text/ttt/TPairList.java +++ b/source/org/thdl/tib/text/ttt/TPairList.java @@ -694,7 +694,7 @@ class TPairList { if (!TibetanMachineWeb.isKnownHashKey(hashKey)) { hashKey = hashKey.replace('+', '-'); if (!TibetanMachineWeb.isKnownHashKey(hashKey)) { - duffsAndErrors.add("[#ERROR The ACIP {" + recoverACIP() + "} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]"); + duffsAndErrors.add("The ACIP {" + recoverACIP() + "} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts."); return; } }