From 56a02ba41d0d03f7100492f79533e758cffcc216 Mon Sep 17 00:00:00 2001 From: dchandler Date: Sat, 10 Apr 2004 18:26:57 +0000 Subject: [PATCH] Fixed the worst TMW->ACIP bug, the one regarding U+0F04 and U+0F05. TMW->EWTS requires no context information, but TMW->ACIP does. --- source/org/thdl/tib/text/TibTextUtils.java | 22 ++++++- .../org/thdl/tib/text/TibetanMachineWeb.java | 57 ++++++++++++++++++- 2 files changed, 76 insertions(+), 3 deletions(-) diff --git a/source/org/thdl/tib/text/TibTextUtils.java b/source/org/thdl/tib/text/TibTextUtils.java index 6d39d38..ac023fd 100644 --- a/source/org/thdl/tib/text/TibTextUtils.java +++ b/source/org/thdl/tib/text/TibTextUtils.java @@ -1638,7 +1638,27 @@ public class TibTextUtils implements THDLWylieConstants { translitBuffer.append(ch); } else { String wylie = TibetanMachineWeb.getWylieForGlyph(dcs[i], noSuch); - String acip = EWTSNotACIP ? null : TibetanMachineWeb.getACIPForGlyph(dcs[i], noSuch); + String acip = null; + if (!EWTSNotACIP) { + // U+0F04 and U+0F05 -- these require lookahead to + // see if the ACIP is # (two shishes) or * (one + // swish) + + int howManyConsumed[] = new int[] { -1 /* invalid */ }; + + acip = TibetanMachineWeb.getACIPForGlyph(dcs[i], + ((i+1This would be more straightforward if it were not the case that + a TMW->ACIP conversion requires context information in the case + of U+0F04 and U+0F05. Because it does, two DuffCodes, not one, + must be passed in whenever possible. + +

We opt to treat a lone U+0F05 as an error in TMW->ACIP + conversions rather than return the pseudo-ACIP Unicode character + escape for U+0F05. After all, the conversion is TMW->ACIP, not + TMW->pseudo-ACIP. + + @return error message or valid ACIP, never pseudo-ACIP like + Unicode character escapes + @param dc the leftmost DuffCode if optionalNextDC is nonnull, or + the sole DuffCode + @param optionalNextDC null if dc is the last (rightmost) DuffCode + in the sequence, or the DuffCode following dc. If you pass in dc + equal to the DuffCode for U+0F04, and optionalNextDC null, then + "*" will be returned, so don't leave this out unless dc is the + rightmost DuffCode. + @param noSuchACIP an array whose first element will be set to true + if and only if an error message is returned instead of valid ACIP; + the first element is never set to false, so nominally caller will + initialize the first element to false + @param howManyGlyphsUsed an array whose first element will be set + to 2 if valid ACIP that describes both dc and optionalNextDC is + returned, or 1 otherwise */ +public static String getACIPForGlyph(DuffCode dc, + DuffCode optionalNextDC, + boolean noSuchACIP[], + int howManyGlyphsUsed[]) { String hashKey = getHashKeyForGlyph(dc); + if (null != hashKey && hashKey.equals("@")) { // hard-coded EWTS value + String nextHashKey + = ((null == optionalNextDC) + ? null : getHashKeyForGlyph(optionalNextDC)); + if (null != nextHashKey && nextHashKey.equals("#")) { // hard-coded EWTS value + howManyGlyphsUsed[0] = 2; + return "#"; // hard-coded ACIP value + } else { + howManyGlyphsUsed[0] = 1; + return "*"; // hard-coded ACIP value + } + } + howManyGlyphsUsed[0] = 1; String ans = (hashKey == null) ? null : acipForGlyph(hashKey); if (hashKey == null || ans == null) { noSuchACIP[0] = true;