diff --git a/source/org/thdl/tib/input/DuffPaneTest.java b/source/org/thdl/tib/input/DuffPaneTest.java index 11e7020..72ffe9a 100644 --- a/source/org/thdl/tib/input/DuffPaneTest.java +++ b/source/org/thdl/tib/input/DuffPaneTest.java @@ -1321,6 +1321,36 @@ public class DuffPaneTest extends DuffPaneTestBase { keyboard really must use its technology. */ } + + /** Tests this part of bug 998476: + +
+TMW -> EWTS conversion errors
+The following incorrect conversion are happening:
+
+dga'o -> dag'o
+'da'i -> 'ad'i
+
+'da'i should produce 'da'i since the genitive particle 'i can
+only be appended to syllables that end with vowel or
+with a chung ('). For instance sgra + 'i = sgra'i, nam
+mkha' + 'i = nam mkha'i. The case here is 'da' + 'i = 'da'i.
+syllable 'ad can't take the genitive 'i, so 'ad'i is invalid.
+
+Of course this is a hypothetical syllable with no meaning,
+but following the rules 'ad would be correct. 'da would be
+mistaken. "In two-lettered words, the first is always the
+root letter." (losang thonden's modern tibetan language,
+pag 41).
+
+ + */ + public void testBug998476() { + enableEWTSKeyboard(); + e("dga'o"); + e("'da'i"); + } + } // FIXME: EWTS needs a list of "native" stacks in it. diff --git a/source/org/thdl/tib/input/TinyTest.java b/source/org/thdl/tib/input/TinyTest.java index 691ca08..b6a68a2 100644 --- a/source/org/thdl/tib/input/TinyTest.java +++ b/source/org/thdl/tib/input/TinyTest.java @@ -41,35 +41,12 @@ public class TinyTest extends DuffPaneTestBase { super(a0); } - /** Tests this part of bug 998476: - -
-TMW -> EWTS conversion errors
-The following incorrect conversion are happening:
-
-dga'o -> dag'o
-bsad -> bas.d
-'da'i -> 'ad'i
-
-'da'i should produce 'da'i since the genitive particle 'i can
-only be appended to syllables that end with vowel or
-with a chung ('). For instance sgra + 'i = sgra'i, nam
-mkha' + 'i = nam mkha'i. The case here is 'da' + 'i = 'da'i.
-syllable 'ad can't take the genitive 'i, so 'ad'i is invalid.
-
-Of course this is a hypothetical syllable with no meaning,
-but following the rules 'ad would be correct. 'da would be
-mistaken. "In two-lettered words, the first is always the
-root letter." (losang thonden's modern tibetan language,
-pag 41).
-
- + /** Tests part of bug 998476. */ public void testBug998476() { enableEWTSKeyboard(); - e("dga'o", "dag'o"); // FIXME: this is a bug - e("bsad", "bas.d"); // FIXME: this is a bug - e("'da'i", "'ad'i"); // FIXME: this is a bug + // DLC FIXME 998476: make e("bsad") pass. + e("lM"); } } diff --git a/source/org/thdl/tib/text/TibTextUtils.java b/source/org/thdl/tib/text/TibTextUtils.java index be4c4bb..8fc3625 100644 --- a/source/org/thdl/tib/text/TibTextUtils.java +++ b/source/org/thdl/tib/text/TibTextUtils.java @@ -945,7 +945,7 @@ public class TibTextUtils implements THDLWylieConstants { TranslitList ans = getTranslitImplementation(EWTSNotACIP, dcs, noSuch, warnings); if (debug && warnings.length() > 0) - System.out.println("DEBUG: warnings in TMW->Wylie: " + warnings); + System.out.println("DEBUG: warnings in TMW->Wylie: " + warnings); return ans; } @@ -1327,8 +1327,6 @@ public class TibTextUtils implements THDLWylieConstants { @@ -1374,6 +1372,7 @@ public class TibTextUtils implements THDLWylieConstants { TGCList gcs = breakTshegBarIntoGraphemeClusters(glyphList, noSuch); String candidateType = getClassificationOfTshegBar(gcs, warnings, false); + if (debug) System.out.println("DEBUG: tsheg bar classification is " + candidateType); int sz = gcs.size(); if (candidateType == "invalid" || candidateType == "single-sanskrit-gc") { @@ -1422,7 +1421,9 @@ public class TibTextUtils implements THDLWylieConstants { int leftover = sz + 1; // Appendaged vs. not appendaged? it affects nothing at - // this stage except for pa'm vs. pa'am. + // this stage except for pa'm vs. pa'am and + // appendaged-prefix/root-root/suffix (e.g., 'ad'i + // (incorrect) vs. 'da'i (correct)). boolean appendaged = (candidateType.startsWith("appendaged-")); candidateType = getCandidateTypeModuloAppendage(candidateType); @@ -1512,7 +1513,8 @@ public class TibTextUtils implements THDLWylieConstants { fontSize); } } else if ("root" == candidateType - || "prefix/root-root/suffix" == candidateType + || (!appendaged + && "prefix/root-root/suffix" == candidateType) || "prefix/root" == candidateType || "root-suffix-postsuffix" == candidateType || "root-suffix" == candidateType) { @@ -1526,7 +1528,7 @@ public class TibTextUtils implements THDLWylieConstants { == ((TGCPair)gcs.get(0)).classification); translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie1), fontSize); - if (debug) System.out.println("DEBUG: appending vowel"); + if (debug) System.out.println("DEBUG: appending vowel 2"); } else { if (debug) System.out.println("DEBUG: already has vowel 2"); } @@ -1545,6 +1547,8 @@ public class TibTextUtils implements THDLWylieConstants { } } else if ("prefix-root-suffix" == candidateType || "prefix-root" == candidateType + || (appendaged + && "prefix/root-root/suffix" == candidateType) || "prefix-root-suffix-postsuffix" == candidateType) { String wylie1 = ((TGCPair)gcs.get(0)).getWylie(); String wylie2 = ((TGCPair)gcs.get(1)).getWylie(); @@ -1568,7 +1572,7 @@ public class TibTextUtils implements THDLWylieConstants { != TGCPair.CONSONANTAL_WITH_VOWEL) { ThdlDebug.verify(TGCPair.CONSONANTAL_WITHOUT_VOWEL == ((TGCPair)gcs.get(1)).classification); - if (debug) System.out.println("DEBUG: appending vowel"); + if (debug) System.out.println("DEBUG: appending vowel 1"); translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie2), fontSize); } else { @@ -1648,22 +1652,16 @@ public class TibTextUtils implements THDLWylieConstants { * @return the Extended Wylie/ACIP corresponding to these glyphs (with * font size info), or null */ private static TranslitList getTranslitImplementation(boolean EWTSNotACIP, - SizedDuffCode[] dcs, - boolean noSuch[], - StringBuffer warnings) { - if (dcs.length == 0) - return null; - + SizedDuffCode[] dcs, + boolean noSuch[], + StringBuffer warnings) { + // DLC FIXME: " " should become " " for ACIP ArrayList glyphList = new ArrayList(); TranslitList translitBuffer = new TranslitList(); - - // DLC FIXME: " " should become " " for ACIP for (int i = 0; i < dcs.length; i++) { char ch = dcs[i].getDuffCode().getCharacter(); - int k = dcs[i].getDuffCode().getCharNum(); int fsz = dcs[i].getFontSize(); - - if (k < 32) { + if ((int)ch < 32) { // 32 is space, ' ' if (!glyphList.isEmpty()) { getTshegBarTranslit(EWTSNotACIP, glyphList, noSuch, warnings, translitBuffer); @@ -1671,7 +1669,6 @@ public class TibTextUtils implements THDLWylieConstants { if (null != warnings) warnings.append("Some glyphs came right before a newline; they did not have a tsheg or shad come first."); } - // In ACIP, \n\n (or \r\n\r\n with DOS line feeds) // indicates a real line break. if (!EWTSNotACIP && '\n' == ch) { @@ -1682,7 +1679,7 @@ public class TibTextUtils implements THDLWylieConstants { translitBuffer.append(ch, fsz); } translitBuffer.append(ch, fsz); - } else { + } else { // (int)ch >= 32 String wylie = TibetanMachineWeb.getWylieForGlyph(dcs[i].getDuffCode(), noSuch); @@ -1691,9 +1688,7 @@ public class TibTextUtils implements THDLWylieConstants { // U+0F04 and U+0F05 -- these require lookahead to // see if the ACIP is # (two shishes) or * (one // swish) - int howManyConsumed[] = new int[] { -1 /* invalid */ }; - acip = TibetanMachineWeb.getACIPForGlyph(dcs[i].getDuffCode(), ((i+1= 1); + i += howManyConsumed[0] - 1; } if (TibetanMachineWeb.isWyliePunc(wylie) && !TibetanMachineWeb.isWylieAdornment(wylie)) { @@ -1725,10 +1715,9 @@ public class TibTextUtils implements THDLWylieConstants { glyphList.add(dcs[i]); } } - } + } // for // replace remaining TMW with transliteration - if (!glyphList.isEmpty()) { getTshegBarTranslit(EWTSNotACIP, glyphList, noSuch, warnings, translitBuffer); @@ -1736,12 +1725,7 @@ public class TibTextUtils implements THDLWylieConstants { if (null != warnings) warnings.append("The stretch of Tibetan ended without final punctuation."); } - - if (translitBuffer.length() > 0) { - return translitBuffer; - } else { - return null; - } + return ((translitBuffer.length() > 0) ? translitBuffer : null); } /** Returns "root" instead of "appendaged-root", for example. */