TMW->ACIP is much improved. V and W were confused, # and * were
confused; many glyphs that should have yielded errors were not. I've added a test case that transforms every TMW glyph save the one with no TM mapping to ACIP. I hand-checked that it was correct. ACIP->TMW is fixed for # and *. I never noticed it, but each needed an extra swoosh (U+0F05). Round-tripping would be good, as would testing real-world use of TMW->ACIP.
This commit is contained in:
parent
244a9d1370
commit
1bfd3772e6
10 changed files with 1110 additions and 85 deletions
|
@ -112,17 +112,28 @@ public class TGCPair implements THDLWylieConstants {
|
|||
public String getACIP() {
|
||||
return getACIP(null);
|
||||
}
|
||||
/** Like {@link #getWylie(String)} but for ACIP transliteration, not EWTS. */
|
||||
/** Like {@link #getWylie(String)} but for ACIP transliteration,
|
||||
not EWTS. */
|
||||
public String getACIP(String previousTranslitIfAppendaged) {
|
||||
// DLC FIXME: has the EWTS change affected Manipulate.acipToWylie?
|
||||
StringBuffer b = new StringBuffer();
|
||||
if (consonantWylie != null) {
|
||||
String consonantACIP
|
||||
= org.thdl.tib.text.ttt.ACIPRules.getACIPForEWTS(consonantWylie);
|
||||
= null;
|
||||
if ("w".equals(consonantWylie)
|
||||
&& (SANSKRIT_WITHOUT_VOWEL == classification
|
||||
|| SANSKRIT_WITH_VOWEL == classification))
|
||||
consonantACIP = "V";
|
||||
else
|
||||
consonantACIP
|
||||
= org.thdl.tib.text.ttt.ACIPRules.getACIPForEWTS(consonantWylie);
|
||||
if (null == consonantACIP) {
|
||||
return TibetanMachineWeb.getTMWToACIPErrorString("glyph with THDL Extended Wylie " + consonantWylie);
|
||||
if (null != consonantWylie && consonantWylie.startsWith("R+"))
|
||||
return TibetanMachineWeb.getTMWToACIPErrorString("glyph with THDL Extended Wylie " + consonantWylie, " because the ACIP R+... could imply the short superscribed form, but this most likely intends the full form (i.e., Unicode character U+0F6A)");
|
||||
return TibetanMachineWeb.getTMWToACIPErrorString("glyph with THDL Extended Wylie " + consonantWylie, "");
|
||||
} else {
|
||||
// Think of pa'am... we want 'am, not 'm; 'ang, not 'ng. But we want 'ur, not 'uar, 'is, not 'ias.
|
||||
// Think of pa'am... we want 'am, not 'm; 'ang, not
|
||||
// 'ng. But we want 'ur, not 'uar, 'is, not 'ias.
|
||||
if (null != previousTranslitIfAppendaged
|
||||
&& "'".equals(previousTranslitIfAppendaged)) {
|
||||
b.append("A");
|
||||
|
@ -140,7 +151,7 @@ public class TGCPair implements THDLWylieConstants {
|
|||
String vowelACIP
|
||||
= org.thdl.tib.text.ttt.ACIPRules.getACIPForEWTS(vowelWylie);
|
||||
if (null == vowelACIP) {
|
||||
return TibetanMachineWeb.getTMWToACIPErrorString("glyph with THDL Extended Wylie " + vowelWylie);
|
||||
return TibetanMachineWeb.getTMWToACIPErrorString("glyph with THDL Extended Wylie " + vowelWylie, "");
|
||||
} else {
|
||||
b.append(vowelACIP);
|
||||
}
|
||||
|
|
|
@ -900,10 +900,13 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
/** Returns "a"/"A", unless wylie (which really is EWTS, not ACIP)
|
||||
is already "a". */
|
||||
private static String aVowelToUseAfter(boolean EWTSNotACIP, String wylie) {
|
||||
if (wylie.equals(ACHEN))
|
||||
return ""; // it's a, not aa, for achen alone.
|
||||
else
|
||||
return (EWTSNotACIP) ? WYLIE_aVOWEL : "A";
|
||||
if (wylie.equals(ACHEN) && EWTSNotACIP) {
|
||||
/* it's EWTS{a}, not EWTS{aa}, for achen alone. But it's
|
||||
ACIP{AA}. */
|
||||
return "";
|
||||
} else
|
||||
return ((EWTSNotACIP)
|
||||
? WYLIE_aVOWEL : "A" /* hard-coded ACIP constant */);
|
||||
}
|
||||
|
||||
private static String unambiguousPostAVowelTranslit(boolean EWTSNotACIP,
|
||||
|
@ -929,7 +932,7 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
* EWTSNotACIP is true, or the ACIP otherwise.
|
||||
* @param EWTSNotACIP true if you want THDL Extended Wylie, false if
|
||||
* you want ACIP
|
||||
* @param dcs an array of glyphs
|
||||
* @param dcs an array of TMW glyphs
|
||||
* @param noSuch an array which will not be touched if this is
|
||||
* successful; however, if there is no THDL Extended Wylie/ACIP
|
||||
* corresponding to these glyphs, then noSuch[0] will be set to true
|
||||
|
@ -959,9 +962,9 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
// DLC FIXME: {H}, U+0F7F, is part of a grapheme cluster!
|
||||
// David Chapman and I both need a comprehensive list of these
|
||||
// guys. Get it from Unicode 4.0 spec?
|
||||
/** Scans the glyphs in glyphList and creates the returned list of
|
||||
grapheme clusters based on them. A grapheme cluster is a
|
||||
consonant or consonant stack with optional adornment or a
|
||||
/** Scans the TMW glyphs in glyphList and creates the returned
|
||||
list of grapheme clusters based on them. A grapheme cluster
|
||||
is a consonant or consonant stack with optional adornment or a
|
||||
number (possibly super- or subscribed) or some other glyph
|
||||
alone. */
|
||||
private static TGCList breakTshegBarIntoGraphemeClusters(java.util.List glyphList,
|
||||
|
@ -986,7 +989,12 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
String wylie = TibetanMachineWeb.getWylieForGlyph(dc, noSuchWylie);
|
||||
boolean buildingUpSanskritNext = false;
|
||||
if ((buildingUpSanskritNext
|
||||
= TibetanMachineWeb.isWylieSanskritConsonantStack(wylie))
|
||||
= (TibetanMachineWeb.isWylieSanskritConsonantStack(wylie)
|
||||
||
|
||||
/* U+0FAD, which should become ACIP "V", not "W",
|
||||
though the EWTS is "w" just as it is for
|
||||
TMW(fontNum==1).53: */
|
||||
(8 == dc.getFontNum() && 69 == dc.getCharNum())))
|
||||
|| TibetanMachineWeb.isWylieTibetanConsonantOrConsonantStack(wylie)) {
|
||||
if (buildingUpVowel.length() > 0 || null != nonVowelWylie) {
|
||||
gcs.add(new TGCPair(nonVowelWylie,
|
||||
|
@ -1612,7 +1620,7 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
ArrayList glyphList = new ArrayList();
|
||||
StringBuffer translitBuffer = new StringBuffer();
|
||||
|
||||
// DLC FIXME: " " should become " ", and test with ACIP # and *.
|
||||
// DLC FIXME: " " should become " " for ACIP
|
||||
for (int i=0; i<dcs.length; i++) {
|
||||
char ch = dcs[i].getCharacter();
|
||||
int k = dcs[i].getCharNum();
|
||||
|
@ -1650,13 +1658,18 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
((i+1<dcs.length)
|
||||
? dcs[i+1]
|
||||
: null),
|
||||
((i+2<dcs.length)
|
||||
? dcs[i+2]
|
||||
: null),
|
||||
noSuch,
|
||||
howManyConsumed);
|
||||
if (howManyConsumed[0] == 1) {
|
||||
// nothing to do
|
||||
} else {
|
||||
ThdlDebug.verify(howManyConsumed[0] == 2);
|
||||
} else if (howManyConsumed[0] == 2) {
|
||||
++i;
|
||||
} else {
|
||||
ThdlDebug.verify(howManyConsumed[0] == 3);
|
||||
++i; ++i;
|
||||
}
|
||||
}
|
||||
if (TibetanMachineWeb.isWyliePunc(wylie)
|
||||
|
@ -1683,8 +1696,9 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
warnings.append("The stretch of Tibetan ended without final punctuation.");
|
||||
}
|
||||
|
||||
if (translitBuffer.length() > 0)
|
||||
if (translitBuffer.length() > 0) {
|
||||
return translitBuffer.toString();
|
||||
}
|
||||
else
|
||||
return null;
|
||||
}
|
||||
|
|
|
@ -966,9 +966,12 @@ public static boolean isWylieTibetanConsonantOrConsonantStack(String s) {
|
|||
}
|
||||
|
||||
/**
|
||||
* Returns true if and only if s is the THDL Extended Wylie for a
|
||||
* Sanskrit multi-consonant stack.
|
||||
*/
|
||||
* Returns true if and only if s is necessarily the THDL Extended Wylie
|
||||
* for a Sanskrit (non-Tibetan, to be more correct) multi-consonant
|
||||
* stack. If s is "w", then it might be the EWTS for TWM7.69, and that
|
||||
* glyph is only used in non-Tibetan stacks, but "w" also stands for
|
||||
* TMW.53, which is Tibetan, so this will return false for such a
|
||||
* glyph. */
|
||||
public static boolean isWylieSanskritConsonantStack(String s) {
|
||||
return sanskritStackSet.contains(s);
|
||||
}
|
||||
|
@ -1909,11 +1912,18 @@ public static String wylieForGlyph(String hashKey) {
|
|||
return sb.toString();
|
||||
}
|
||||
|
||||
// DLC DOC
|
||||
/** Returns the ACIP transliteration for a glyph with hash key
|
||||
hashKey, or returns null if there is none. */
|
||||
private static String acipForGlyph(String hashKey) {
|
||||
String ACIP // DLC FIXME: test this.
|
||||
= org.thdl.tib.scanner.Manipulate.wylieToAcip(hashKey);
|
||||
return ACIP;
|
||||
if (1 == hashKey.length()
|
||||
// ~X is a special case because the EWTS is 2 characters in
|
||||
// length
|
||||
|| "~X".equals(hashKey)) // hard-coded EWTS value
|
||||
return org.thdl.tib.text.ttt.ACIPRules.getACIPForEWTS(hashKey);
|
||||
else
|
||||
// else we are not be able to use it because it's not smart
|
||||
// about stacks (e.g., W+W)
|
||||
return org.thdl.tib.scanner.Manipulate.wylieToAcip(hashKey);
|
||||
}
|
||||
|
||||
/** Error that appears in a document when some TMW cannot be
|
||||
|
@ -1927,15 +1937,15 @@ private static String getTMWToWylieErrorString(DuffCode dc) {
|
|||
}
|
||||
|
||||
/** Error that appears in a document when some TMW cannot be
|
||||
* transcribed in ACIP. This error message is
|
||||
* documented in www/htdocs/TMW_RTF_TO_THDL_WYLIE.html (DLC NOT YET), so change
|
||||
* them both when you change this. */
|
||||
static String getTMWToACIPErrorString(String it) {
|
||||
return "[# JSKAD_TMW_TO_ACIP_ERROR_NO_SUCH_ACIP: Cannot convert " + it + " to ACIP. Please transcribe this yourself.]";
|
||||
* transcribed in ACIP. This error message is documented in
|
||||
* www/htdocs/TMW_or_TM_To_X_Converters.html, so change them both
|
||||
* when you change this. */
|
||||
static String getTMWToACIPErrorString(String it, String explanation) {
|
||||
return "[# JSKAD_TMW_TO_ACIP_ERROR_NO_SUCH_ACIP: Cannot convert " + it + " to ACIP" + explanation + ". Please transcribe this yourself.]";
|
||||
}
|
||||
|
||||
private static String getTMWToACIPErrorString(DuffCode dc) {
|
||||
return getTMWToACIPErrorString(dc.toString(true));
|
||||
private static String getTMWToACIPErrorString(DuffCode dc, String explanation) {
|
||||
return getTMWToACIPErrorString(dc.toString(true), explanation);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1979,65 +1989,103 @@ public static String getWylieForGlyph(DuffCode dc, boolean noSuchWylie[]) {
|
|||
}
|
||||
|
||||
/** Returns ACIP transliteration or an error message stating why no
|
||||
ACIP transliteration exists for the sole glyph dc or the two
|
||||
glyphs dc and optionalNextDC as a whole. noSuchACIP[0] will be
|
||||
set (to true) if and only if there is no ACIP representation for
|
||||
dc; in that case, an error message is returned rather than valid
|
||||
ACIP. optionalNextDC should be null if there is no context
|
||||
information available (such as if dc is the last DuffCode being
|
||||
converted from TMW to ACIP) or the DuffCode following dc
|
||||
otherwise. If the ACIP (or error message) returned captures both
|
||||
dc and the nonnull optionalNextDC, then howManyGlyphsUsed[0] will
|
||||
be set to 2, otherwise it will be set to 1.
|
||||
ACIP transliteration exists for one, two, or three TMW glyphs.
|
||||
This gobbles up three TMW glyphs when and only when "#" is
|
||||
returned; this gobbles up two TMW glyphs when and only when "@" is
|
||||
returned; this gobbles up one TMW glyph otherwise. The number
|
||||
gobbled is stored into howManyGlyphsUsed[0]. Always pass in as
|
||||
many glyphs as possible.
|
||||
|
||||
<p>noSuchACIP[0] will be set (to true) if and only if there is no
|
||||
ACIP representation; in that case, an error message is returned
|
||||
rather than valid ACIP. dc2 and/or dc3 should be null if there is
|
||||
no context information available (i.e., if dc1 or dc2 is the last
|
||||
DuffCode being converted from TMW to ACIP). Otherwise, dc2 should
|
||||
be the DuffCode following dc1 and dc3 should be the DuffCode
|
||||
following dc2. If the ACIP (or error message) returned captures
|
||||
both dc1 and the (nonnull) dc2 and the (nonnull) dc3, then
|
||||
howManyGlyphsUsed[0] will be set to 3. If the ACIP (or error
|
||||
message) returned captures both dc1 and the nonnull dc2, then
|
||||
howManyGlyphsUsed[0] will be set to 2. Otherwise it will be set
|
||||
to 1.
|
||||
|
||||
<p>This would be more straightforward if it were not the case that
|
||||
a TMW->ACIP conversion requires context information in the case
|
||||
of U+0F04 and U+0F05. Because it does, two DuffCodes, not one,
|
||||
of U+0F04 and U+0F05. Because it does, three DuffCodes, not one,
|
||||
must be passed in whenever possible.
|
||||
|
||||
<p>We opt to treat a lone U+0F05 as an error in TMW->ACIP
|
||||
conversions rather than return the pseudo-ACIP Unicode character
|
||||
escape for U+0F05. After all, the conversion is TMW->ACIP, not
|
||||
TMW->pseudo-ACIP.
|
||||
<p>We opt to treat a lone U+0F05 or U+0F04 as an error in
|
||||
TMW->ACIP conversions rather than return the pseudo-ACIP
|
||||
Unicode character escape. After all, the conversion is
|
||||
TMW->ACIP, not TMW->pseudo-ACIP.
|
||||
|
||||
@return error message or valid ACIP, never pseudo-ACIP like
|
||||
Unicode character escapes
|
||||
@param dc the leftmost DuffCode if optionalNextDC is nonnull, or
|
||||
the sole DuffCode
|
||||
@param optionalNextDC null if dc is the last (rightmost) DuffCode
|
||||
in the sequence, or the DuffCode following dc. If you pass in dc
|
||||
equal to the DuffCode for U+0F04, and optionalNextDC null, then
|
||||
"*" will be returned, so don't leave this out unless dc is the
|
||||
rightmost DuffCode.
|
||||
@param dc1 the leftmost TMW DuffCode if dc2 is nonnull,
|
||||
or the sole TMW DuffCode
|
||||
@param dc2 null if dc1 is the last (rightmost) TMW DuffCode in the
|
||||
sequence, or the TMW DuffCode following dc1. If you pass in dc1
|
||||
equal to the TMW DuffCode for U+0F04, and dc2 null, then "*" will
|
||||
be returned, so don't leave this out unless dc1 is the rightmost
|
||||
TMW DuffCode.
|
||||
@param dc3 null if dc2 is null or is the last (rightmost) TMW
|
||||
DuffCode in the sequence, or the TMW DuffCode following dc2
|
||||
otherwise.
|
||||
@param noSuchACIP an array whose first element will be set to true
|
||||
if and only if an error message is returned instead of valid ACIP;
|
||||
the first element is never set to false, so nominally caller will
|
||||
initialize the first element to false
|
||||
@param howManyGlyphsUsed an array whose first element will be set
|
||||
to 2 if valid ACIP that describes both dc and optionalNextDC is
|
||||
returned, or 1 otherwise */
|
||||
public static String getACIPForGlyph(DuffCode dc,
|
||||
DuffCode optionalNextDC,
|
||||
to 3 if valid ACIP that desribes dc1, dc2, and dc3 is returned, to
|
||||
2 if valid ACIP that describes both dc1 and dc2 is returned, or to
|
||||
1 otherwise */
|
||||
public static String getACIPForGlyph(DuffCode dc1,
|
||||
DuffCode dc2,
|
||||
DuffCode dc3,
|
||||
boolean noSuchACIP[],
|
||||
int howManyGlyphsUsed[]) {
|
||||
String hashKey = getHashKeyForGlyph(dc);
|
||||
|
||||
// DLC FIXME: TMW.53 is probably going to come out all wrong (VA
|
||||
// vs. WA) from this function, but
|
||||
// ACIPRules.getACIPForEWTS(String) seems to come through... will
|
||||
// it always?
|
||||
|
||||
String hashKey = getHashKeyForGlyph(dc1);
|
||||
if (null != hashKey && hashKey.equals("@")) { // hard-coded EWTS value
|
||||
String nextHashKey
|
||||
= ((null == optionalNextDC)
|
||||
? null : getHashKeyForGlyph(optionalNextDC));
|
||||
= ((null == dc2)
|
||||
? null : getHashKeyForGlyph(dc2));
|
||||
if (null != nextHashKey && nextHashKey.equals("#")) { // hard-coded EWTS value
|
||||
String nextNextHashKey
|
||||
= ((null == dc3)
|
||||
? null : getHashKeyForGlyph(dc3));
|
||||
if (null != nextNextHashKey && nextNextHashKey.equals("#")) { // hard-coded EWTS value
|
||||
howManyGlyphsUsed[0] = 3;
|
||||
return "#"; // hard-coded ACIP value
|
||||
}
|
||||
howManyGlyphsUsed[0] = 2;
|
||||
return "#"; // hard-coded ACIP value
|
||||
} else {
|
||||
howManyGlyphsUsed[0] = 1;
|
||||
return "*"; // hard-coded ACIP value
|
||||
}
|
||||
} // else fall through
|
||||
}
|
||||
if (null != hashKey && hashKey.equals("@#")) { // hard-coded EWTS value
|
||||
String nextHashKey
|
||||
= ((null == dc2)
|
||||
? null : getHashKeyForGlyph(dc2));
|
||||
if (null != nextHashKey && nextHashKey.equals("#")) { // hard-coded EWTS value
|
||||
howManyGlyphsUsed[0] = 2; // not 3
|
||||
return "#"; // hard-coded ACIP value
|
||||
}
|
||||
howManyGlyphsUsed[0] = 1; // not 2
|
||||
return "*"; // hard-coded ACIP value
|
||||
}
|
||||
|
||||
howManyGlyphsUsed[0] = 1;
|
||||
String ans = (hashKey == null) ? null : acipForGlyph(hashKey);
|
||||
if (hashKey == null || ans == null) {
|
||||
if (null == ans) {
|
||||
noSuchACIP[0] = true;
|
||||
return getTMWToACIPErrorString(dc);
|
||||
if (null != hashKey && hashKey.startsWith("R+"))
|
||||
return getTMWToACIPErrorString(dc1, " because the ACIP R+... could imply the short superscribed form, but this most likely intends the full form (i.e., Unicode character U+0F6A)");
|
||||
return getTMWToACIPErrorString(dc1, "");
|
||||
}
|
||||
return ans;
|
||||
}
|
||||
|
|
|
@ -23,6 +23,8 @@
|
|||
// glyphs from TMW. 0F6A is not listed here (DLC FIXME: should it be?),
|
||||
// but the glyph for it is the glyph for 0F62.
|
||||
//
|
||||
// The EWTS is not a unique key -- see "r", for example.
|
||||
//
|
||||
// DuffPaneTest ensures that the na-ro column truly contains na-ros,
|
||||
// by the way.
|
||||
//
|
||||
|
@ -70,7 +72,8 @@ __TILDE__X~102,5~~9,102~~~~~~~0F35
|
|||
// though, and we let it become U+0F7E when you convert TMW->Unicode.
|
||||
// That is, we treat them as interchangeable except for in TMW->TM
|
||||
// mappings, where [8,91] does not map to any TM glyph (though you
|
||||
// could argue that it should become what [8,90] becomes).
|
||||
// could argue that it should become what [8,90] becomes -- DLC
|
||||
// FIXME).
|
||||
M~~~8,91~~~~~~~0F7E
|
||||
__TILDE__M~241,1~~8,94~~~~~~~0F83
|
||||
|
||||
|
|
|
@ -628,9 +628,16 @@ public class ACIPConverter {
|
|||
tdocLocation[0] += s.getText().length();
|
||||
continue; // FIXME: this means the unicode above doesn't go into the output if null != writer && null != tdoc?
|
||||
} else {
|
||||
String wy = ACIPRules.getWylieForACIPOther(s.getText());
|
||||
if (null == wy) throw new Error("No wylie for ACIP " + s.getText());
|
||||
duff = new Object[] { TibetanMachineWeb.getGlyph(wy) };
|
||||
if ("#".equals(s.getText())) { // hard-coded ACIP value
|
||||
duff = new Object[] {
|
||||
TibetanMachineWeb.getGlyph("@#"),
|
||||
TibetanMachineWeb.getGlyph("#")
|
||||
}; // hard-coded EWTS values
|
||||
} else {
|
||||
String wy = ACIPRules.getWylieForACIPOther(s.getText());
|
||||
if (null == wy) throw new Error("No wylie for ACIP " + s.getText());
|
||||
duff = new Object[] { TibetanMachineWeb.getGlyph(wy) };
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -157,6 +157,9 @@ public class ACIPRules {
|
|||
getWylieForACIPOther(null);
|
||||
getWylieForACIPVowel(null);
|
||||
String ans = (String)wylieToACIP.get(EWTS);
|
||||
boolean useCapitalW = false;
|
||||
if (EWTS.startsWith("w"))
|
||||
useCapitalW = true; // We want W+NA, not V+NA; we want WA, not VA.
|
||||
if (null == ans) {
|
||||
StringBuffer finalAns = new StringBuffer(EWTS.length());
|
||||
StringTokenizer sTok = new StringTokenizer(EWTS, "-+", true);
|
||||
|
@ -182,9 +185,14 @@ public class ACIPRules {
|
|||
if (null == part) return null;
|
||||
finalAns.append(part);
|
||||
}
|
||||
if (useCapitalW)
|
||||
finalAns.setCharAt(0, 'W');
|
||||
return finalAns.toString();
|
||||
}
|
||||
return ans;
|
||||
if (useCapitalW)
|
||||
return "W" + ans.substring(1);
|
||||
else
|
||||
return ans;
|
||||
}
|
||||
|
||||
/** Registers acip->wylie mappings in toWylie; registers
|
||||
|
@ -193,6 +201,12 @@ public class ACIPRules {
|
|||
toWylie.put(ACIP, EWTS);
|
||||
if (null == wylieToACIP) {
|
||||
wylieToACIP = new HashMap(75);
|
||||
|
||||
// We don't want to put "/" in toWylie:
|
||||
wylieToACIP.put("(", "/");
|
||||
wylieToACIP.put(")", "/");
|
||||
wylieToACIP.put("?", "\\");
|
||||
|
||||
wylieToACIP.put("_", " "); // oddball.
|
||||
wylieToACIP.put("o'i", "O'I"); // oddball for TMW9.61.
|
||||
}
|
||||
|
@ -307,14 +321,20 @@ public class ACIPRules {
|
|||
if (acipOther2wylie == null) {
|
||||
acipOther2wylie = new HashMap(20);
|
||||
|
||||
// don't use putMapping for this. We don't want TMW->ACIP
|
||||
// to produce "." for a U+0F0C because ACIP doesn't say
|
||||
// that "." means U+0F0C. It just seems to in practice
|
||||
// for ACIP Release IV texts.
|
||||
acipOther2wylie.put(".", "*");
|
||||
|
||||
putMapping(acipOther2wylie, "m", "M");
|
||||
putMapping(acipOther2wylie, ":", "H");
|
||||
putMapping(acipOther2wylie, ",", "/");
|
||||
putMapping(acipOther2wylie, " ", " ");
|
||||
putMapping(acipOther2wylie, ".", "*");
|
||||
putMapping(acipOther2wylie, "|", "|");
|
||||
putMapping(acipOther2wylie, ";", "|");
|
||||
putMapping(acipOther2wylie, "`", "!");
|
||||
putMapping(acipOther2wylie, ";", ";");
|
||||
putMapping(acipOther2wylie, "*", "@");
|
||||
putMapping(acipOther2wylie, "#", "@#");
|
||||
putMapping(acipOther2wylie, "*", "@#");
|
||||
// There is no glyph in TMW with the EWTS @##, so we don't do this: putMapping(acipOther2wylie, "#", "@##");
|
||||
putMapping(acipOther2wylie, "%", "~X");
|
||||
putMapping(acipOther2wylie, "o", "X");
|
||||
putMapping(acipOther2wylie, "&", "&");
|
||||
|
|
|
@ -359,6 +359,7 @@ class TParseTree {
|
|||
}
|
||||
}
|
||||
if (stackSize > 1 && tp.getLeft() != null && tp.getLeft().length() > 1) {
|
||||
// DLC FIXME: gives a false positive warning for Rsh
|
||||
hasAmbiguousConsonant = true;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue