Added UI for EWTS->Tibetan conversions. GUI is disabled except in
debug mode for now. I tested against a really simple-but-real document, found a bug with '*', tried to implement TMW vowel code but I don't trust it yet. Differentiated EWTS code from ACIP where needed. Several bugs in ewts->tibetan have been exposed; see the TODO comments.
This commit is contained in:
parent
7198f23361
commit
2678fc134a
9 changed files with 150 additions and 34 deletions
|
@ -560,6 +560,7 @@ public final class ACIPTraits implements TTraits {
|
|||
} else if (wowel.indexOf("'I") >= 0) {
|
||||
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.I_VOWEL, context_added);
|
||||
} else {
|
||||
// TODO(dchandler): I don't understand why we go from else ifs to this form...
|
||||
if (wowel.indexOf('\'') >= 0) {
|
||||
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.A_VOWEL, context_added);
|
||||
}
|
||||
|
|
|
@ -23,6 +23,8 @@ package org.thdl.tib.text.ttt;
|
|||
import java.util.ArrayList;
|
||||
|
||||
import org.thdl.tib.text.DuffCode;
|
||||
import org.thdl.tib.text.THDLWylieConstants;
|
||||
import org.thdl.tib.text.TibTextUtils;
|
||||
import org.thdl.tib.text.TibetanMachineWeb;
|
||||
import org.thdl.util.ThdlDebug;
|
||||
|
||||
|
@ -154,7 +156,70 @@ public final class EWTSTraits implements TTraits {
|
|||
public TTshegBarScanner scanner() { return EWTSTshegBarScanner.instance(); }
|
||||
|
||||
public void getDuffForWowel(ArrayList duff, DuffCode preceding, String wowel) {
|
||||
throw new Error("TODO(DLC)[EWTS->Tibetan]");
|
||||
|
||||
// TODO(DLC)[EWTS->Tibetan]: I have no confidence in this! test, test, test.
|
||||
|
||||
// Order matters here.
|
||||
boolean context_added[] = new boolean[] { false };
|
||||
if (wowel.equals(THDLWylieConstants.WYLIE_aVOWEL)) {
|
||||
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.WYLIE_aVOWEL, context_added);
|
||||
} else {
|
||||
// TODO(DLC)[EWTS->Tibetan]: test vowel stacking
|
||||
if (wowel.indexOf(THDLWylieConstants.U_VOWEL) >= 0) {
|
||||
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.U_VOWEL, context_added);
|
||||
}
|
||||
if (wowel.indexOf(THDLWylieConstants.reverse_I_VOWEL) >= 0) {
|
||||
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.reverse_I_VOWEL, context_added);
|
||||
} else if (wowel.indexOf(THDLWylieConstants.I_VOWEL) >= 0) {
|
||||
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.I_VOWEL, context_added);
|
||||
}
|
||||
if (wowel.indexOf(THDLWylieConstants.A_VOWEL) >= 0) {
|
||||
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.A_VOWEL, context_added);
|
||||
}
|
||||
if (wowel.indexOf(THDLWylieConstants.ai_VOWEL) >= 0) {
|
||||
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.ai_VOWEL, context_added);
|
||||
}
|
||||
if (wowel.indexOf(THDLWylieConstants.au_VOWEL) >= 0) {
|
||||
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.au_VOWEL, context_added);
|
||||
}
|
||||
if (wowel.indexOf(THDLWylieConstants.reverse_i_VOWEL) >= 0) {
|
||||
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.reverse_i_VOWEL, context_added);
|
||||
} else if (wowel.indexOf(THDLWylieConstants.i_VOWEL) >= 0) {
|
||||
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.i_VOWEL, context_added);
|
||||
}
|
||||
if (wowel.indexOf(THDLWylieConstants.e_VOWEL) >= 0) {
|
||||
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.e_VOWEL, context_added);
|
||||
}
|
||||
if (wowel.indexOf(THDLWylieConstants.o_VOWEL) >= 0) {
|
||||
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.o_VOWEL, context_added);
|
||||
}
|
||||
if (wowel.indexOf(THDLWylieConstants.u_VOWEL) >= 0) {
|
||||
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.u_VOWEL, context_added);
|
||||
}
|
||||
if (wowel.indexOf("~X") >= 0) { // TODO(DLC)[EWTS->Tibetan]: introduce THDLWylieConstants.blah
|
||||
duff.add(TibetanMachineWeb.getGlyph("~X"));
|
||||
} else if (wowel.indexOf("X") >= 0) { // TODO(DLC)[EWTS->Tibetan]: introduce THDLWylieConstants.blah
|
||||
duff.add(TibetanMachineWeb.getGlyph("X"));
|
||||
}
|
||||
}
|
||||
// FIXME: Use TMW9.61, the "o'i" special combination, when appropriate.
|
||||
|
||||
if (wowel.indexOf('M') >= 0) {
|
||||
DuffCode last = null;
|
||||
if (duff.size() > 0) {
|
||||
last = (DuffCode)duff.get(duff.size() - 1);
|
||||
duff.remove(duff.size() - 1); // getBindu will add it back...
|
||||
// TODO(DLC)[EWTS->Tibetan]: is this okay???? when is a bindu okay to be alone???
|
||||
}
|
||||
TibTextUtils.getBindu(duff, last);
|
||||
}
|
||||
if (wowel.indexOf('H') >= 0)
|
||||
duff.add(TibetanMachineWeb.getGlyph("H"));
|
||||
|
||||
|
||||
// TODO(DLC)[EWTS->Tibetan]: verify that no part of wowel is discarded! acip does that. 'jam~X I think we screw up, e.g.
|
||||
|
||||
// TODO(DLC)[EWTS->Tibetan]:: are bindus are screwed up in the unicode output? i see (with tmuni font) lone bindus without glyphs to stack on
|
||||
}
|
||||
|
||||
public String getUnicodeForWowel(String wowel) {
|
||||
|
@ -218,12 +283,17 @@ public final class EWTSTraits implements TTraits {
|
|||
}
|
||||
|
||||
public String getUnicodeFor(String l, boolean subscribed) {
|
||||
|
||||
|
||||
// First, handle "\u0f71\u0f84\u0f86", "", "\u0f74", etc.
|
||||
{
|
||||
boolean already_done = true;
|
||||
for (int i = 0; i < l.length(); i++) {
|
||||
if (!(l.charAt(0) >= '\u0f00' && l.charAt(0) <= '\u0fff')) {
|
||||
char ch = l.charAt(i);
|
||||
if ((ch < '\u0f00' || ch > '\u0fff')
|
||||
&& '\n' != ch
|
||||
&& '\r' != ch) {
|
||||
// TODO(DLC)[EWTS->Tibetan]: Is this the place
|
||||
// where we want to interpret how newlines work???
|
||||
already_done = false;
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -61,6 +61,9 @@ class EWTSTshegBarScanner extends TTshegBarScanner {
|
|||
StringBuffer sb = new StringBuffer(s);
|
||||
ExpandEscapeSequences(sb);
|
||||
int sl = sb.length();
|
||||
// TODO(DLC)[EWTS->Tibetan]:: '@#', in ewts->tmw, is not working
|
||||
// TODO(DLC)[EWTS->Tibetan]:: 'jamX 'jam~X one is not working in ->tmw mode
|
||||
// TODO(DLC)[EWTS->Tibetan]:: dzaHsogs is not working
|
||||
for (int i = 0; i < sl; i++) {
|
||||
if (isValidInsideTshegBar(sb.charAt(i))) {
|
||||
StringBuffer tbsb = new StringBuffer();
|
||||
|
@ -75,7 +78,7 @@ class EWTSTshegBarScanner extends TTshegBarScanner {
|
|||
al.add(new TString("EWTS", tbsb.toString(),
|
||||
TString.TIBETAN_NON_PUNCTUATION));
|
||||
} else {
|
||||
if (" /;|!:=_@#$%<>()\r\n\t".indexOf(sb.charAt(i)) >= 0)
|
||||
if (" /;|!:=_@#$%<>()\r\n\t*".indexOf(sb.charAt(i)) >= 0)
|
||||
al.add(new TString("EWTS", sb.substring(i, i+1),
|
||||
TString.TIBETAN_PUNCTUATION));
|
||||
else
|
||||
|
|
|
@ -327,14 +327,16 @@ class TParseTree {
|
|||
translit,
|
||||
traits);
|
||||
} else {
|
||||
if (bestParse.hasStackWithoutVowel(pl, isLastStack)) {
|
||||
if (bestParse.hasStackWithoutVowel(traits.isACIP(),
|
||||
pl, isLastStack)) {
|
||||
if (isLastStack[0]) {
|
||||
if (ErrorsAndWarnings.isEnabled(502, warningLevel))
|
||||
return ErrorsAndWarnings.getMessage(502, shortMessages,
|
||||
translit,
|
||||
traits);
|
||||
} else {
|
||||
throw new Error("Can't happen now that we stack greedily");
|
||||
if (traits.isACIP())
|
||||
throw new Error("Can't happen now that we stack greedily");
|
||||
}
|
||||
}
|
||||
if (ErrorsAndWarnings.isEnabled(503, warningLevel))
|
||||
|
@ -343,14 +345,16 @@ class TParseTree {
|
|||
traits);
|
||||
}
|
||||
} else {
|
||||
if (nip.get(0).hasStackWithoutVowel(pl, isLastStack)) {
|
||||
if (nip.get(0).hasStackWithoutVowel(traits.isACIP(),
|
||||
pl, isLastStack)) {
|
||||
if (isLastStack[0]) {
|
||||
if (ErrorsAndWarnings.isEnabled(502, warningLevel))
|
||||
return ErrorsAndWarnings.getMessage(502, shortMessages,
|
||||
translit,
|
||||
traits);
|
||||
} else {
|
||||
throw new Error("Can't happen now that we stack greedily [2]");
|
||||
if (traits.isACIP())
|
||||
throw new Error("Can't happen now that we stack greedily [2]");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -193,14 +193,15 @@ class TStackList {
|
|||
|
||||
/** Returns true if and only if this stack list contains a stack
|
||||
* that does not end in a vowel or disambiguator. Note that this
|
||||
* is not erroneous for legal Tibetan like {BRTAN}, where {B} has
|
||||
* is not erroneous for legal Tibetan like ACIP {BRTAN}, where {B} has
|
||||
* no vowel, but it is a warning sign for Sanskrit stacks.
|
||||
* @param isACIP true iff opl is ACIP (not EWTS)
|
||||
* @param opl the pair list from which this stack list
|
||||
* originated
|
||||
* @param isLastStack if non-null, then isLastStack[0] will be
|
||||
* set to true if and only if the very last stack is the only
|
||||
* stack not to have a vowel or disambiguator on it */
|
||||
boolean hasStackWithoutVowel(TPairList opl, boolean[] isLastStack) {
|
||||
boolean hasStackWithoutVowel(boolean isACIP, TPairList opl, boolean[] isLastStack) {
|
||||
int runningSize = 0;
|
||||
// FIXME: MARDA is MARD==MAR-D to us, but is probably MAR+DA, warn -- see 838470
|
||||
for (int i = 0; i < size(); i++) {
|
||||
|
@ -213,15 +214,16 @@ class TStackList {
|
|||
&& l.charAt(0) >= '0' && l.charAt(0) <= '9')) {
|
||||
if (null != isLastStack) {
|
||||
isLastStack[0] = (i + 1 == size());
|
||||
if (!isLastStack[0]) {
|
||||
if (!isLastStack[0] && isACIP) {
|
||||
throw new Error("But we now stack greedily!");
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
if (runningSize != opl.sizeMinusDisambiguators())
|
||||
if (runningSize != opl.sizeMinusDisambiguators()) {
|
||||
throw new IllegalArgumentException("runningSize = " + runningSize + "; opl.sizeMinusDisambiguators = " + opl.sizeMinusDisambiguators() + "; opl (" + opl + ") is bad for this stack list (" + toString() + ")");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
@ -34,7 +34,7 @@ import org.thdl.tib.text.DuffCode;
|
|||
*
|
||||
* <p>It is very likely that classes that implement this interface
|
||||
* will choose to use the design pattern 'singleton'. */
|
||||
interface TTraits {
|
||||
public interface TTraits {
|
||||
/** Returns the disambiguator for this transliteration scheme,
|
||||
* which had better be a string containing just one character
|
||||
* lest {@link #disambiguatorChar()} become nonsensical for
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue