Fixed the worst TMW->ACIP bug, the one regarding U+0F04 and U+0F05.
TMW->EWTS requires no context information, but TMW->ACIP does.
This commit is contained in:
parent
9e7ccf2894
commit
56a02ba41d
2 changed files with 76 additions and 3 deletions
|
@ -1638,7 +1638,27 @@ public class TibTextUtils implements THDLWylieConstants {
|
||||||
translitBuffer.append(ch);
|
translitBuffer.append(ch);
|
||||||
} else {
|
} else {
|
||||||
String wylie = TibetanMachineWeb.getWylieForGlyph(dcs[i], noSuch);
|
String wylie = TibetanMachineWeb.getWylieForGlyph(dcs[i], noSuch);
|
||||||
String acip = EWTSNotACIP ? null : TibetanMachineWeb.getACIPForGlyph(dcs[i], noSuch);
|
String acip = null;
|
||||||
|
if (!EWTSNotACIP) {
|
||||||
|
// U+0F04 and U+0F05 -- these require lookahead to
|
||||||
|
// see if the ACIP is # (two shishes) or * (one
|
||||||
|
// swish)
|
||||||
|
|
||||||
|
int howManyConsumed[] = new int[] { -1 /* invalid */ };
|
||||||
|
|
||||||
|
acip = TibetanMachineWeb.getACIPForGlyph(dcs[i],
|
||||||
|
((i+1<dcs.length)
|
||||||
|
? dcs[i+1]
|
||||||
|
: null),
|
||||||
|
noSuch,
|
||||||
|
howManyConsumed);
|
||||||
|
if (howManyConsumed[0] == 1) {
|
||||||
|
// nothing to do
|
||||||
|
} else {
|
||||||
|
ThdlDebug.verify(howManyConsumed[0] == 2);
|
||||||
|
++i;
|
||||||
|
}
|
||||||
|
}
|
||||||
if (TibetanMachineWeb.isWyliePunc(wylie)
|
if (TibetanMachineWeb.isWyliePunc(wylie)
|
||||||
&& !TibetanMachineWeb.isWylieAdornment(wylie)) {
|
&& !TibetanMachineWeb.isWylieAdornment(wylie)) {
|
||||||
if (!glyphList.isEmpty()) {
|
if (!glyphList.isEmpty()) {
|
||||||
|
|
|
@ -1978,9 +1978,62 @@ public static String getWylieForGlyph(DuffCode dc, boolean noSuchWylie[]) {
|
||||||
return wylieForGlyph(hashKey);
|
return wylieForGlyph(hashKey);
|
||||||
}
|
}
|
||||||
|
|
||||||
// DLC DOC
|
/** Returns ACIP transliteration or an error message stating why no
|
||||||
public static String getACIPForGlyph(DuffCode dc, boolean noSuchACIP[]) {
|
ACIP transliteration exists for the sole glyph dc or the two
|
||||||
|
glyphs dc and optionalNextDC as a whole. noSuchACIP[0] will be
|
||||||
|
set (to true) if and only if there is no ACIP representation for
|
||||||
|
dc; in that case, an error message is returned rather than valid
|
||||||
|
ACIP. optionalNextDC should be null if there is no context
|
||||||
|
information available (such as if dc is the last DuffCode being
|
||||||
|
converted from TMW to ACIP) or the DuffCode following dc
|
||||||
|
otherwise. If the ACIP (or error message) returned captures both
|
||||||
|
dc and the nonnull optionalNextDC, then howManyGlyphsUsed[0] will
|
||||||
|
be set to 2, otherwise it will be set to 1.
|
||||||
|
|
||||||
|
<p>This would be more straightforward if it were not the case that
|
||||||
|
a TMW->ACIP conversion requires context information in the case
|
||||||
|
of U+0F04 and U+0F05. Because it does, two DuffCodes, not one,
|
||||||
|
must be passed in whenever possible.
|
||||||
|
|
||||||
|
<p>We opt to treat a lone U+0F05 as an error in TMW->ACIP
|
||||||
|
conversions rather than return the pseudo-ACIP Unicode character
|
||||||
|
escape for U+0F05. After all, the conversion is TMW->ACIP, not
|
||||||
|
TMW->pseudo-ACIP.
|
||||||
|
|
||||||
|
@return error message or valid ACIP, never pseudo-ACIP like
|
||||||
|
Unicode character escapes
|
||||||
|
@param dc the leftmost DuffCode if optionalNextDC is nonnull, or
|
||||||
|
the sole DuffCode
|
||||||
|
@param optionalNextDC null if dc is the last (rightmost) DuffCode
|
||||||
|
in the sequence, or the DuffCode following dc. If you pass in dc
|
||||||
|
equal to the DuffCode for U+0F04, and optionalNextDC null, then
|
||||||
|
"*" will be returned, so don't leave this out unless dc is the
|
||||||
|
rightmost DuffCode.
|
||||||
|
@param noSuchACIP an array whose first element will be set to true
|
||||||
|
if and only if an error message is returned instead of valid ACIP;
|
||||||
|
the first element is never set to false, so nominally caller will
|
||||||
|
initialize the first element to false
|
||||||
|
@param howManyGlyphsUsed an array whose first element will be set
|
||||||
|
to 2 if valid ACIP that describes both dc and optionalNextDC is
|
||||||
|
returned, or 1 otherwise */
|
||||||
|
public static String getACIPForGlyph(DuffCode dc,
|
||||||
|
DuffCode optionalNextDC,
|
||||||
|
boolean noSuchACIP[],
|
||||||
|
int howManyGlyphsUsed[]) {
|
||||||
String hashKey = getHashKeyForGlyph(dc);
|
String hashKey = getHashKeyForGlyph(dc);
|
||||||
|
if (null != hashKey && hashKey.equals("@")) { // hard-coded EWTS value
|
||||||
|
String nextHashKey
|
||||||
|
= ((null == optionalNextDC)
|
||||||
|
? null : getHashKeyForGlyph(optionalNextDC));
|
||||||
|
if (null != nextHashKey && nextHashKey.equals("#")) { // hard-coded EWTS value
|
||||||
|
howManyGlyphsUsed[0] = 2;
|
||||||
|
return "#"; // hard-coded ACIP value
|
||||||
|
} else {
|
||||||
|
howManyGlyphsUsed[0] = 1;
|
||||||
|
return "*"; // hard-coded ACIP value
|
||||||
|
}
|
||||||
|
}
|
||||||
|
howManyGlyphsUsed[0] = 1;
|
||||||
String ans = (hashKey == null) ? null : acipForGlyph(hashKey);
|
String ans = (hashKey == null) ? null : acipForGlyph(hashKey);
|
||||||
if (hashKey == null || ans == null) {
|
if (hashKey == null || ans == null) {
|
||||||
noSuchACIP[0] = true;
|
noSuchACIP[0] = true;
|
||||||
|
|
Loading…
Reference in a new issue