From 557ed7ed44cbb73ad0edc455008f36b32f3a51a5 Mon Sep 17 00:00:00 2001 From: dchandler Date: Sat, 18 Oct 2003 17:49:29 +0000 Subject: [PATCH] DKY'O etc. weren't being handled properly by ACIP->Tibetan. Now they are. --- source/org/thdl/tib/text/ttt/.cvsignore | 3 ++ source/org/thdl/tib/text/ttt/ACIPRules.java | 36 +++++++++++++++++++ source/org/thdl/tib/text/ttt/PackageTest.java | 14 ++++---- source/org/thdl/tib/text/ttt/TPair.java | 20 ++--------- .../thdl/tib/text/ttt/TPairListFactory.java | 31 ++++++++++++---- 5 files changed, 75 insertions(+), 29 deletions(-) create mode 100644 source/org/thdl/tib/text/ttt/.cvsignore diff --git a/source/org/thdl/tib/text/ttt/.cvsignore b/source/org/thdl/tib/text/ttt/.cvsignore new file mode 100644 index 0000000..bc150c4 --- /dev/null +++ b/source/org/thdl/tib/text/ttt/.cvsignore @@ -0,0 +1,3 @@ +*~ +*.bak +.#* diff --git a/source/org/thdl/tib/text/ttt/ACIPRules.java b/source/org/thdl/tib/text/ttt/ACIPRules.java index ad535f7..377dad4 100644 --- a/source/org/thdl/tib/text/ttt/ACIPRules.java +++ b/source/org/thdl/tib/text/ttt/ACIPRules.java @@ -557,4 +557,40 @@ public class ACIPRules { duff.add(TibetanMachineWeb.getGlyph("H")); } + + /** Returns true if and only if l is the ACIP representation of a + letter that can be a suffix. Note that all postsuffixes are + also suffixes. l must not have an "A" -- use "S", not "SA", + that is. */ + public static boolean isACIPSuffix(String l) { + return ("S".equals(l) + || "G".equals(l) + || "D".equals(l) + || "M".equals(l) + || "'".equals(l) + || "B".equals(l) + || "NG".equals(l) + || "N".equals(l) + || "L".equals(l) + || "R".equals(l)); + } + + /** Returns true if and only if l is the ACIP representation of a + letter that can be a prefix. l must not have an "A" -- use + "D", not "DA", that is. */ + public static boolean isACIPPrefix(String l) { + return ("'".equals(l) + || "M".equals(l) + || "B".equals(l) + || "D".equals(l) + || "G".equals(l)); + } + + /** Returns true if and only if l is the ACIP representation of a + letter that can be a postsuffix. l must not have an "A" -- + use "D", not "DA", that is. */ + public static boolean isACIPPostsuffix(String l) { + return ("S".equals(l) + || "D".equals(l)); + } } diff --git a/source/org/thdl/tib/text/ttt/PackageTest.java b/source/org/thdl/tib/text/ttt/PackageTest.java index 74d20d8..1e10d75 100644 --- a/source/org/thdl/tib/text/ttt/PackageTest.java +++ b/source/org/thdl/tib/text/ttt/PackageTest.java @@ -408,10 +408,8 @@ tstHelper("KA'", "[(K . A), (' . )]", tstHelper("SNYAMS'AM'ANG", "{S}{NYA}{M}{S'A}{M'A}{NG}", null, null, "{S+NYA}{M+S'A}{M'A}{NG}", 0); tstHelper("SNYAM'AM", null, null, null, "{S+NYA}{M}{'A}{M}", 2); tstHelper("SNYAMS'AM", null, null, null, "{S+NYA}{M}{S}{'A}{M}", 2); - tstHelper("SNYAM-'A-M", null, null, null, "!null!", 1); tstHelper("SNYAM-'A-M", null, null, null, "{S+NYA}{M}{'A}{M}", -1); tstHelper("SNY-M-'-M", null, null, null, "{S+NY}{M}{'}{M}", -1); - tstHelper("SNY-M-'-M", null, null, null, "!null!", 1); tstHelper("SNYAMS'AM'ANG'U'I'O", null, null, null, "{S+NYA}{M}{S}{'A}{M}{'A}{NG}{'U}{'I}{'O}", 2); tstHelper("SNYAMS'I'AM'ANG'U'I'O", null, null, null, "{S+NYA}{M}{S}{'I}{'A}{M}{'A}{NG}{'U}{'I}{'O}", 2); tstHelper("SNYAM+S+'O", null, null, null, "{S+NYA}{M+S+'O}", -1); @@ -421,7 +419,6 @@ tstHelper("KA'", "[(K . A), (' . )]", tstHelper("SAM'US", null, null, null, "{SA}{M}{'U}{S}", 2); tstHelper("SAM'UR'US", null, null, null, "{SA}{M}{'U}{R}{'U}{S}", 2); tstHelper("LA'OS", null, null, null, "{LA}{'O}{S}", -1); - tstHelper("LA'OS", null, null, null, "!null!", 1); tstHelper("NA'OS", null, null, null, "{NA}{'O}{S}", -1); tstHelper("NA'IS", null, null, null, "{NA}{'I}{S}", -1); tstHelper("LE'UNG", null, null, null, "{LE}{'U}{NG}", -1); @@ -456,12 +453,17 @@ tstHelper("KA'", "[(K . A), (' . )]", } tstHelper("GDAM-S'O", null, null, null, "{G}{DA}{M}{S}{'O}", 2); - tstHelper("GDAM-C'O", null, null, null, "{G+DA}{M}{C'O}", 0); - tstHelper("GDAM-C'O", null, null, null, "{G+DA}{M}{C}{'O}", 3); + tstHelper("GDAM-C'O", null, null, null, "{G+DA}{M}{C'O}", -1); tstHelper("BRLA", null, null, null, "{B}{R+LA}", -1); tstHelper("DKY", null, null, null, "{D}{K+Y}", -1); - // DLC FIXME NOW this should work, but doesn't: tstHelper("DKY'O", null, null, null, "{D}{K+Y'O}", 3); + tstHelper("DKY'O", null, null, null, "{D}{K+Y'O}", -1); tstHelper("DKYA'O", null, null, null, "{D}{K+YA}{'O}", -1); + tstHelper("GM'O", null, null, null, "{G+M'O}", -1); + tstHelper("GAM'O", null, null, null, "{GA}{M}{'O}", 2); + tstHelper("G-M'O", null, null, null, "{G}{M'O}", -1); + tstHelper("DM'O", null, null, null, "{D}{M'O}", -1); + tstHelper("DAM'O", null, null, null, "{DA}{M}{'O}", 2); + tstHelper("D-M'O", null, null, null, "{D}{M'O}", -1); tstHelper("SHR'I", "{SH}{R'I}", null, diff --git a/source/org/thdl/tib/text/ttt/TPair.java b/source/org/thdl/tib/text/ttt/TPair.java index a3f8e7f..f98807b 100644 --- a/source/org/thdl/tib/text/ttt/TPair.java +++ b/source/org/thdl/tib/text/ttt/TPair.java @@ -119,11 +119,7 @@ class TPair { && ((null == r || "".equals(r)) || "-".equals(r) || "A".equals(r)) // DLC FIXME: though check for BASKYABS and warn because BSKYABS is more common - && ("'".equals(l) - || "M".equals(l) - || "B".equals(l) - || "D".equals(l) - || "G".equals(l))); + && ACIPRules.isACIPPrefix(l)); } /** Returns true if and only if this pair could be a Tibetan @@ -133,8 +129,7 @@ class TPair { && ((null == r || "".equals(r)) || "-".equals(r) || "A".equals(r)) // DLC FIXME: though warn about GAMASA vs. GAMS - && ("S".equals(l) - || "D".equals(l))); + && ACIPRules.isACIPPostsuffix(l)); } /** Returns true if and only if this pair could be a Tibetan @@ -144,16 +139,7 @@ class TPair { && ((null == r || "".equals(r)) || "-".equals(r) || "A".equals(r)) - && ("S".equals(l) - || "G".equals(l) - || "D".equals(l) - || "M".equals(l) - || "'".equals(l) - || "B".equals(l) - || "NG".equals(l) - || "N".equals(l) - || "L".equals(l) - || "R".equals(l))); + && ACIPRules.isACIPSuffix(l)); } /** Returns true if and only if this pair is merely a diff --git a/source/org/thdl/tib/text/ttt/TPairListFactory.java b/source/org/thdl/tib/text/ttt/TPairListFactory.java index 8e19629..c9d6a86 100644 --- a/source/org/thdl/tib/text/ttt/TPairListFactory.java +++ b/source/org/thdl/tib/text/ttt/TPairListFactory.java @@ -60,8 +60,8 @@ class TPairListFactory { * but you don't) */ static TPairList[] breakACIPIntoChunks(String acip) throws IllegalArgumentException { try { - TPairList a = breakHelper(acip, true); - TPairList b = breakHelper(acip, false); + TPairList a = breakHelper(acip, true, false); + TPairList b = breakHelper(acip, false, false); if (a.equals(b)) return new TPairList[] { a, null }; else @@ -72,8 +72,15 @@ class TPairListFactory { throw new IllegalArgumentException("Input too large[2]: " + acip); } } - /** Helps {@link breakACIPIntoChunks(String)}. */ - private static TPairList breakHelper(String acip, boolean tickIsVowel) { + /** Helps {@link breakACIPIntoChunks(String)}. + * @param tickIsVowel true if and only if you want to treat the + * ACIP {'} as an U+0F71 vowel instead of the full-sized + * consonant in special, "this might be an appendage like 'AM or + * 'ANG" circumstances + * @param weHaveSeenVowelAlready true if and only if, in our + * recursion, we've already found one vowel (not a disambiguator, + * but a vowel like "A", "E", "Um:", "'U", etc.) */ + private static TPairList breakHelper(String acip, boolean tickIsVowel, boolean weHaveSeenVowelAlready) { // base case for our recursion: if ("".equals(acip)) @@ -86,6 +93,8 @@ class TPairListFactory { if (!tickIsVowel && null != head.getLeft() && null != head.getRight() + && weHaveSeenVowelAlready + && ACIPRules.isACIPSuffix(head.getLeft()) // DKY'O should be two horizontal units, not three. -- {D}{KY'O}, not {D}{KY}{'O}. && head.getRight().startsWith("'")) { head = new TPair(head.getLeft(), // Without this disambiguator, we are @@ -97,7 +106,12 @@ class TPairListFactory { TPairList tail; if ((tail - = breakHelper(acipBuf.substring(howMuch), tickIsVowel)).hasSimpleError()) { + = breakHelper(acipBuf.substring(howMuch), + tickIsVowel, + weHaveSeenVowelAlready + || (head.getRight() != null + && !"+".equals(head.getRight()) + && !"-".equals(head.getRight())))).hasSimpleError()) { for (int i = 1; i < howMuch; i++) { // try giving i characters back if that leaves us with // a legal head and makes the rest free of simple @@ -106,7 +120,12 @@ class TPairListFactory { TPair newHead; if ((newHead = head.minusNRightmostACIPCharacters(i)).isLegal() && !(newTail - = breakHelper(acipBuf.substring(howMuch - i), tickIsVowel)).hasSimpleError()) { + = breakHelper(acipBuf.substring(howMuch - i), + tickIsVowel, + weHaveSeenVowelAlready + || (newHead.getRight() != null + && !"+".equals(newHead.getRight()) + && !"-".equals(newHead.getRight())))).hasSimpleError()) { newTail.prepend(newHead); return newTail; }