diff --git a/source/org/thdl/tib/input/DuffPaneTest.java b/source/org/thdl/tib/input/DuffPaneTest.java
index 11e7020..72ffe9a 100644
--- a/source/org/thdl/tib/input/DuffPaneTest.java
+++ b/source/org/thdl/tib/input/DuffPaneTest.java
@@ -1321,6 +1321,36 @@ public class DuffPaneTest extends DuffPaneTestBase {
keyboard really must use its technology.
*/
}
+
+ /** Tests this part of bug 998476:
+
+
+TMW -> EWTS conversion errors
+The following incorrect conversion are happening:
+
+dga'o -> dag'o
+'da'i -> 'ad'i
+
+'da'i should produce 'da'i since the genitive particle 'i can
+only be appended to syllables that end with vowel or
+with a chung ('). For instance sgra + 'i = sgra'i, nam
+mkha' + 'i = nam mkha'i. The case here is 'da' + 'i = 'da'i.
+syllable 'ad can't take the genitive 'i, so 'ad'i is invalid.
+
+Of course this is a hypothetical syllable with no meaning,
+but following the rules 'ad would be correct. 'da would be
+mistaken. "In two-lettered words, the first is always the
+root letter." (losang thonden's modern tibetan language,
+pag 41).
+
+
+ */
+ public void testBug998476() {
+ enableEWTSKeyboard();
+ e("dga'o");
+ e("'da'i");
+ }
+
}
// FIXME: EWTS needs a list of "native" stacks in it.
diff --git a/source/org/thdl/tib/input/TinyTest.java b/source/org/thdl/tib/input/TinyTest.java
index 691ca08..b6a68a2 100644
--- a/source/org/thdl/tib/input/TinyTest.java
+++ b/source/org/thdl/tib/input/TinyTest.java
@@ -41,35 +41,12 @@ public class TinyTest extends DuffPaneTestBase {
super(a0);
}
- /** Tests this part of bug 998476:
-
-
-TMW -> EWTS conversion errors
-The following incorrect conversion are happening:
-
-dga'o -> dag'o
-bsad -> bas.d
-'da'i -> 'ad'i
-
-'da'i should produce 'da'i since the genitive particle 'i can
-only be appended to syllables that end with vowel or
-with a chung ('). For instance sgra + 'i = sgra'i, nam
-mkha' + 'i = nam mkha'i. The case here is 'da' + 'i = 'da'i.
-syllable 'ad can't take the genitive 'i, so 'ad'i is invalid.
-
-Of course this is a hypothetical syllable with no meaning,
-but following the rules 'ad would be correct. 'da would be
-mistaken. "In two-lettered words, the first is always the
-root letter." (losang thonden's modern tibetan language,
-pag 41).
-
-
+ /** Tests part of bug 998476.
*/
public void testBug998476() {
enableEWTSKeyboard();
- e("dga'o", "dag'o"); // FIXME: this is a bug
- e("bsad", "bas.d"); // FIXME: this is a bug
- e("'da'i", "'ad'i"); // FIXME: this is a bug
+ // DLC FIXME 998476: make e("bsad") pass.
+ e("lM");
}
}
diff --git a/source/org/thdl/tib/text/TibTextUtils.java b/source/org/thdl/tib/text/TibTextUtils.java
index be4c4bb..8fc3625 100644
--- a/source/org/thdl/tib/text/TibTextUtils.java
+++ b/source/org/thdl/tib/text/TibTextUtils.java
@@ -945,7 +945,7 @@ public class TibTextUtils implements THDLWylieConstants {
TranslitList ans
= getTranslitImplementation(EWTSNotACIP, dcs, noSuch, warnings);
if (debug && warnings.length() > 0)
- System.out.println("DEBUG: warnings in TMW->Wylie: " + warnings);
+ System.out.println("DEBUG: warnings in TMW->Wylie: " + warnings);
return ans;
}
@@ -1327,8 +1327,6 @@ public class TibTextUtils implements THDLWylieConstants {
- one or more numbers
- - a single, possibly adorned consonant stack
-
- a legal "tyllable" appended with zero or more particles
from the set { 'i, 'o, 'u, 'e, 'ang, 'am }
@@ -1374,6 +1372,7 @@ public class TibTextUtils implements THDLWylieConstants {
TGCList gcs
= breakTshegBarIntoGraphemeClusters(glyphList, noSuch);
String candidateType = getClassificationOfTshegBar(gcs, warnings, false);
+ if (debug) System.out.println("DEBUG: tsheg bar classification is " + candidateType);
int sz = gcs.size();
if (candidateType == "invalid"
|| candidateType == "single-sanskrit-gc") {
@@ -1422,7 +1421,9 @@ public class TibTextUtils implements THDLWylieConstants {
int leftover = sz + 1;
// Appendaged vs. not appendaged? it affects nothing at
- // this stage except for pa'm vs. pa'am.
+ // this stage except for pa'm vs. pa'am and
+ // appendaged-prefix/root-root/suffix (e.g., 'ad'i
+ // (incorrect) vs. 'da'i (correct)).
boolean appendaged = (candidateType.startsWith("appendaged-"));
candidateType = getCandidateTypeModuloAppendage(candidateType);
@@ -1512,7 +1513,8 @@ public class TibTextUtils implements THDLWylieConstants {
fontSize);
}
} else if ("root" == candidateType
- || "prefix/root-root/suffix" == candidateType
+ || (!appendaged
+ && "prefix/root-root/suffix" == candidateType)
|| "prefix/root" == candidateType
|| "root-suffix-postsuffix" == candidateType
|| "root-suffix" == candidateType) {
@@ -1526,7 +1528,7 @@ public class TibTextUtils implements THDLWylieConstants {
== ((TGCPair)gcs.get(0)).classification);
translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie1),
fontSize);
- if (debug) System.out.println("DEBUG: appending vowel");
+ if (debug) System.out.println("DEBUG: appending vowel 2");
} else {
if (debug) System.out.println("DEBUG: already has vowel 2");
}
@@ -1545,6 +1547,8 @@ public class TibTextUtils implements THDLWylieConstants {
}
} else if ("prefix-root-suffix" == candidateType
|| "prefix-root" == candidateType
+ || (appendaged
+ && "prefix/root-root/suffix" == candidateType)
|| "prefix-root-suffix-postsuffix" == candidateType) {
String wylie1 = ((TGCPair)gcs.get(0)).getWylie();
String wylie2 = ((TGCPair)gcs.get(1)).getWylie();
@@ -1568,7 +1572,7 @@ public class TibTextUtils implements THDLWylieConstants {
!= TGCPair.CONSONANTAL_WITH_VOWEL) {
ThdlDebug.verify(TGCPair.CONSONANTAL_WITHOUT_VOWEL
== ((TGCPair)gcs.get(1)).classification);
- if (debug) System.out.println("DEBUG: appending vowel");
+ if (debug) System.out.println("DEBUG: appending vowel 1");
translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie2),
fontSize);
} else {
@@ -1648,22 +1652,16 @@ public class TibTextUtils implements THDLWylieConstants {
* @return the Extended Wylie/ACIP corresponding to these glyphs (with
* font size info), or null */
private static TranslitList getTranslitImplementation(boolean EWTSNotACIP,
- SizedDuffCode[] dcs,
- boolean noSuch[],
- StringBuffer warnings) {
- if (dcs.length == 0)
- return null;
-
+ SizedDuffCode[] dcs,
+ boolean noSuch[],
+ StringBuffer warnings) {
+ // DLC FIXME: " " should become " " for ACIP
ArrayList glyphList = new ArrayList();
TranslitList translitBuffer = new TranslitList();
-
- // DLC FIXME: " " should become " " for ACIP
for (int i = 0; i < dcs.length; i++) {
char ch = dcs[i].getDuffCode().getCharacter();
- int k = dcs[i].getDuffCode().getCharNum();
int fsz = dcs[i].getFontSize();
-
- if (k < 32) {
+ if ((int)ch < 32) { // 32 is space, ' '
if (!glyphList.isEmpty()) {
getTshegBarTranslit(EWTSNotACIP, glyphList, noSuch,
warnings, translitBuffer);
@@ -1671,7 +1669,6 @@ public class TibTextUtils implements THDLWylieConstants {
if (null != warnings)
warnings.append("Some glyphs came right before a newline; they did not have a tsheg or shad come first.");
}
-
// In ACIP, \n\n (or \r\n\r\n with DOS line feeds)
// indicates a real line break.
if (!EWTSNotACIP && '\n' == ch) {
@@ -1682,7 +1679,7 @@ public class TibTextUtils implements THDLWylieConstants {
translitBuffer.append(ch, fsz);
}
translitBuffer.append(ch, fsz);
- } else {
+ } else { // (int)ch >= 32
String wylie
= TibetanMachineWeb.getWylieForGlyph(dcs[i].getDuffCode(),
noSuch);
@@ -1691,9 +1688,7 @@ public class TibTextUtils implements THDLWylieConstants {
// U+0F04 and U+0F05 -- these require lookahead to
// see if the ACIP is # (two shishes) or * (one
// swish)
-
int howManyConsumed[] = new int[] { -1 /* invalid */ };
-
acip = TibetanMachineWeb.getACIPForGlyph(dcs[i].getDuffCode(),
((i+1= 1);
+ i += howManyConsumed[0] - 1;
}
if (TibetanMachineWeb.isWyliePunc(wylie)
&& !TibetanMachineWeb.isWylieAdornment(wylie)) {
@@ -1725,10 +1715,9 @@ public class TibTextUtils implements THDLWylieConstants {
glyphList.add(dcs[i]);
}
}
- }
+ } // for
// replace remaining TMW with transliteration
-
if (!glyphList.isEmpty()) {
getTshegBarTranslit(EWTSNotACIP, glyphList, noSuch,
warnings, translitBuffer);
@@ -1736,12 +1725,7 @@ public class TibTextUtils implements THDLWylieConstants {
if (null != warnings)
warnings.append("The stretch of Tibetan ended without final punctuation.");
}
-
- if (translitBuffer.length() > 0) {
- return translitBuffer;
- } else {
- return null;
- }
+ return ((translitBuffer.length() > 0) ? translitBuffer : null);
}
/** Returns "root" instead of "appendaged-root", for example. */