From ac412c994b0ddda860c9849169462feef23ee8da Mon Sep 17 00:00:00 2001 From: dchandler Date: Sun, 30 Nov 2003 02:06:48 +0000 Subject: [PATCH] Now {Pm} is treated like {PAm}; {Pm:} is like {PAm:}; {P:} is like {PA:}. --- .../org/thdl/tib/text/ttt/ACIPConverter.java | 2 +- source/org/thdl/tib/text/ttt/ACIPRules.java | 13 ++++++-- source/org/thdl/tib/text/ttt/PackageTest.java | 33 ++++++++++++++----- source/org/thdl/tib/text/ttt/TPair.java | 8 +++-- .../thdl/tib/text/ttt/TPairListFactory.java | 19 +++++++++-- 5 files changed, 56 insertions(+), 19 deletions(-) diff --git a/source/org/thdl/tib/text/ttt/ACIPConverter.java b/source/org/thdl/tib/text/ttt/ACIPConverter.java index 5502841..9ebec93 100644 --- a/source/org/thdl/tib/text/ttt/ACIPConverter.java +++ b/source/org/thdl/tib/text/ttt/ACIPConverter.java @@ -328,7 +328,7 @@ public class ACIPConverter { hasErrors = true; uni = err; } - if (null != writer) writer.write(uni); + writer.write(uni); } if (null != tdoc) { String wylie diff --git a/source/org/thdl/tib/text/ttt/ACIPRules.java b/source/org/thdl/tib/text/ttt/ACIPRules.java index 126775a..b97093a 100644 --- a/source/org/thdl/tib/text/ttt/ACIPRules.java +++ b/source/org/thdl/tib/text/ttt/ACIPRules.java @@ -83,6 +83,9 @@ public class ACIPRules { // Keep this code in sync with getWylieForACIPVowel. } + // {Pm} is treated just like {PAm}; {P:} is treated just + // like {PA:}; {Pm:} is treated just like {PAm:}. But + // that happens thanks to } return (acipVowels.contains(s)); } @@ -276,6 +279,10 @@ public class ACIPRules { putMapping(acipVowel2wylie, baseVowels[i][0] + "m:", baseVowels[i][1] + "MH"); putMapping(acipVowel2wylie, '\'' + baseVowels[i][0] + "m:", baseVowels[i][2] + "MH"); } + // {Pm} is treated just like {PAm}; {P:} is treated just + // like {PA:}; {Pm:} is treated just like {PAm:}. But + // that happens thanks to + // TPairListFactory.getFirstConsonantAndVowel(StringBuffer,int[]). } return (String)acipVowel2wylie.get(acip); } @@ -475,12 +482,13 @@ public class ACIPRules { superACIP2unicode.put("'im:", "\u0F71\u0F80\u0F7E\u0F7F"); // :m does not appear, though you'd think it's as valid as m:. - // I doubt these will occur alone: superACIP2unicode.put("m", "\u0F7E"); superACIP2unicode.put(":", "\u0F7F"); + superACIP2unicode.put("m:", "\u0F7E\u0F7F"); superACIP2unicode.put("Am", "\u0F7E"); superACIP2unicode.put("A:", "\u0F7F"); + superACIP2unicode.put("Am:", "\u0F7E\u0F7F"); superACIP2unicode.put("0", "\u0F20"); superACIP2unicode.put("1", "\u0F21"); @@ -567,12 +575,11 @@ public class ACIPRules { if (vowel.indexOf('m') >= 0) { DuffCode last = (DuffCode)duff.get(duff.size() - 1); - duff.remove(duff.size() - 1); + duff.remove(duff.size() - 1); // getBindu will add it back... TibTextUtils.getBindu(duff, last); } if (vowel.indexOf(':') >= 0) duff.add(TibetanMachineWeb.getGlyph("H")); - } /** Returns true if and only if l is the ACIP representation of a diff --git a/source/org/thdl/tib/text/ttt/PackageTest.java b/source/org/thdl/tib/text/ttt/PackageTest.java index 37db9e8..e9d8606 100644 --- a/source/org/thdl/tib/text/ttt/PackageTest.java +++ b/source/org/thdl/tib/text/ttt/PackageTest.java @@ -656,13 +656,13 @@ tstHelper("KA'", "[(K . A), (' . )]", "{S+P+YO}{M+S}", "{S+P+YO}{M}{S}", }); - tstHelper(":'AO", "[(: . -), (' . ), (A . O)]"); - tstHelper("m'AO", "[(m . -), (' . ), (A . O)]"); - tstHelper("m:'AO", "[(m . -), (: . -), (' . ), (A . O)]"); + tstHelper(":'AO", "[( . A:), (' . ), (A . O)]"); + tstHelper("m'AO", "[( . Am), (' . ), (A . O)]"); + tstHelper("m:'AO", "[( . Am:), (' . ), (A . O)]"); tstHelper("AA:", "[(A . A:)]", new String[] { "{AA:}" }); tstHelper("KE:", "[(K . E:)]"); - tstHelper("K:", "[(K . ), (: . )]", - new String[] { /* No parses exist. "K:" is illegal. */ }); + tstHelper("K:", "[(K . A:)]", + new String[] { "{KA:}" }); tstHelper("'AO", "[(' . ), (A . O)]"); tstHelper("'AOM", "[(' . ), (A . O), (M . )]"); @@ -717,8 +717,8 @@ tstHelper("KA'", "[(K . A), (' . )]", tstHelper("TAA", "[(T . ), (A . A)]"); tstHelper("DAA", "[(D . ), (A . A)]"); - tstHelper("DAAm", "[(D . ), (A . Am)]"); - tstHelper("DAAm:", "[(D . ), (A . Am:)]"); + tstHelper("DAAm", "[(D . A), (A . Am)]"); + tstHelper("DAAm:", "[(D . A), (A . Am:)]"); tstHelper("DA'im:", "[(D . A), (' . im:)]"); tstHelper("NA+YA", "[(N . +), (Y . A)]"); @@ -7196,6 +7196,8 @@ tstHelper("ZUR"); /** Tests {@link ACIPTshegBarScanner#scan(String, StringBuffer, int)}. */ public void testScanner() { + shelp("Pm KA", "", "[TIBETAN_NON_PUNCTUATION:{Pm}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{KA}]"); + shelp("KA (KHA\nGA)", "", "[TIBETAN_NON_PUNCTUATION:{KA}, TIBETAN_PUNCTUATION:{ }, START_PAREN:{(}, TIBETAN_NON_PUNCTUATION:{KHA}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{GA}, END_PAREN:{)}]"); shelp("LA...SGRUB", @@ -7416,6 +7418,7 @@ G+NA MNA' M+NA */ + uhelp("B+NA", "\u0f56\u0fa3"); uhelp("BNA", "[#WARNING CONVERTING ACIP DOCUMENT: Warning: We're going with {B+NA}, but only because our knowledge of prefix rules says that {B}{NA} is not a legal Tibetan tsheg bar (\"syllable\")]\u0f56\u0fa3"); uhelp("^GONG SA", "\u0f38\u0f42\u0f7c\u0f44\u0f0b\u0f66"); uhelp("^ GONG SA", "\u0f38\u0f42\u0f7c\u0f44\u0f0b\u0f66"); @@ -7438,9 +7441,10 @@ M+NA uhelp("x", "[#ERROR CONVERTING ACIP DOCUMENT: Lexical error: The ACIP x must be glued to the end of a tsheg bar, but this one was not]"); uhelp("o", "[#ERROR CONVERTING ACIP DOCUMENT: Lexical error: The ACIP o must be glued to the end of a tsheg bar, but this one was not]"); uhelp("%", "[#ERROR CONVERTING ACIP DOCUMENT: Lexical error: The ACIP % must be glued to the end of a tsheg bar, but this one was not][#WARNING CONVERTING ACIP DOCUMENT: Lexical warning: The ACIP {%} is treated by this converter as U+0F35, but sometimes might represent U+0F14 in practice]"); - uhelp(":", "[#ERROR CONVERTING ACIP DOCUMENT: THE TSHEG BAR (\"SYLLABLE\") : HAS THESE ERRORS: Cannot convert ACIP : because : is not an ACIP consonant]"); - uhelp("m", "[#ERROR CONVERTING ACIP DOCUMENT: THE TSHEG BAR (\"SYLLABLE\") m HAS THESE ERRORS: Cannot convert ACIP m because m is not an ACIP consonant]"); + uhelp(":", "[#ERROR CONVERTING ACIP DOCUMENT: THE TSHEG BAR (\"SYLLABLE\") : HAS THESE ERRORS: Cannot convert ACIP A: because A: is a \"vowel\" without an associated consonant]"); + uhelp("m", "[#ERROR CONVERTING ACIP DOCUMENT: THE TSHEG BAR (\"SYLLABLE\") m HAS THESE ERRORS: Cannot convert ACIP Am because Am is a \"vowel\" without an associated consonant]"); + uhelp("N+YA", "\u0f53\u0fb1"); uhelp("NA+YA", "\u0f53\u0fb1"); // FIXME: warn about the extra A uhelp("NE+YA", "[#ERROR CONVERTING ACIP DOCUMENT: THE TSHEG BAR (\"SYLLABLE\") NE+YA HAS THESE ERRORS: Cannot convert ACIP NE+-YA because + is not an ACIP consonant]"); uhelp("tRAStA", "\u0f4a\u0fb2\u0f66\u0f9a"); @@ -7503,6 +7507,17 @@ M+NA uhelp("WWA", "\u0f5d\u0fba"); uhelp("W+WA", "\u0f5d\u0fba"); + tstHelper("Km:", "{KAm:}", + new String[] { "{KAm:}" }, + new String[] { }, + "{KAm:}"); + uhelp("Km:", "\u0f40\u0f7e\u0f7f"); + uhelp("KAm:", "\u0f40\u0f7e\u0f7f"); + uhelp("Km", "\u0f40\u0f7e"); + uhelp("KAm", "\u0f40\u0f7e"); + uhelp("K:", "\u0f40\u0f7f"); + uhelp("KA:", "\u0f40\u0f7f"); + uhelp("/NY'EE/", "\u0f3C\u0f49\u0F71\u0F7B\u0f3D"); uhelp("*#HUm: G+DHOO GRO`;.,", "\u0f04\u0f05\u0f04\u0f05\u0f05\u0f67\u0f74\u0f7e\u0f7f\u0f0b\u0f42\u0fa2\u0f7d\u0f0b\u0f42\u0fb2\u0f7c\u0f08\u0f11\u0f0c\u0f0d"); diff --git a/source/org/thdl/tib/text/ttt/TPair.java b/source/org/thdl/tib/text/ttt/TPair.java index 1534f76..4a50fa0 100644 --- a/source/org/thdl/tib/text/ttt/TPair.java +++ b/source/org/thdl/tib/text/ttt/TPair.java @@ -213,12 +213,14 @@ class TPair { void getUnicode(StringBuffer sb, boolean subscribed) { if (null != getLeft()) { String x = ACIPRules.getUnicodeFor(getLeft(), subscribed); - if (null != x) sb.append(x); + if (null == x) throw new Error("TPair: " + getLeft() + " has no Uni"); + sb.append(x); } if (null != getRight() - && !("-".equals(getRight()) || "A".equals(getRight()))) { + && !("-".equals(getRight()) || "+".equals(getRight()) || "A".equals(getRight()))) { String x = ACIPRules.getUnicodeFor(getRight(), subscribed); - if (null != x) sb.append(x); + if (null == x) throw new Error("TPair: " + getRight() + " has no Uni"); + sb.append(x); } } diff --git a/source/org/thdl/tib/text/ttt/TPairListFactory.java b/source/org/thdl/tib/text/ttt/TPairListFactory.java index 648762d..e8daad5 100644 --- a/source/org/thdl/tib/text/ttt/TPairListFactory.java +++ b/source/org/thdl/tib/text/ttt/TPairListFactory.java @@ -95,7 +95,7 @@ class TPairListFactory { * 'ANG" circumstances * @param weHaveSeenVowelAlready true if and only if, in our * recursion, we've already found one vowel (not a disambiguator, - * but a vowel like "A", "E", "Um:", "'U", etc.) */ + * but a vowel like "A", "E", "Um:", "m", "'U", etc.) */ private static TPairList breakHelper(String acip, boolean tickIsVowel, boolean weHaveSeenVowelAlready) { // base case for our recursion: @@ -212,7 +212,11 @@ class TPairListFactory { } for (i = Math.min(ACIPRules.MAX_VOWEL_LENGTH, xl - ll); i >= 1; i--) { String t = null; - if (ACIPRules.isVowel(t = acip.substring(ll, ll + i))) { + if (ACIPRules.isVowel(t = acip.substring(ll, ll + i)) + // Or these, which we massage into "Am", "Am:", and + // "A:" because I didn't think {Pm} should be treated + // like {PAm} originally: + || "m".equals(t) || "m:".equals(t) || ":".equals(t)) { r = t; break; } @@ -227,6 +231,14 @@ class TPairListFactory { return new TPair(l, "+"); } + // Allow Pm to mean PAm, P: to mean PA:, Pm: to mean PAm:. + int mod = 0; + if ("m".equals(r)) { r = "Am"; mod = -1; } + if (":".equals(r)) { r = "A:"; mod = -1; } + if ("m:".equals(r)) { r = "Am:"; mod = -1; } + if (":m".equals(r)) { r = "A:m"; mod = -1; } // not seen, though... + + // what if we see a character that's not part of any vowel or // consonant? We return it. if (null == l && null == r) { @@ -236,7 +248,8 @@ class TPairListFactory { } howMuch[0] = (((l == null) ? 0 : l.length()) - + ((r == null) ? 0 : r.length())); + + ((r == null) ? 0 : r.length()) + + mod); return new TPair(l, r); } }