Renamed UnicodeCharToExtendedWylie to
UnicodeCodepointToThdlWylie.java. Added a new class, UnicodeGraphemeCluster, that can tell you the components of a grapheme cluster from top to bottom. It does not yet have good error checking; it is not yet finished. Next is to parse clean Unicode into GraphemeClusters. After that comes scanning dirty Unicode into best-guess GraphemeClusters, and scanning dirty Unicode to get nice error messages.
This commit is contained in:
parent
8e8a23c6a6
commit
7ea185fa01
4 changed files with 481 additions and 69 deletions
|
@ -748,7 +748,7 @@ public class LegalTshegBar
|
|||
if (!isNominalRepresentationOfSimpleSuffix(suffix.charAt(0))) {
|
||||
return internalThrowThing(throwIfIllegal,
|
||||
"Illegal suffix -- not one of the ten legal suffixes: "
|
||||
+ UnicodeUtils.unicodeCPToString(suffix.charAt(0)));
|
||||
+ UnicodeUtils.unicodeCodepointToString(suffix.charAt(0)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -837,7 +837,7 @@ public class LegalTshegBar
|
|||
|
||||
boolean disambiguatorNeeded = false;
|
||||
char prefix = getPrefix();
|
||||
sb.append(UnicodeCharToThdlWylie.getThdlWylieForUnicodeChar(prefix));
|
||||
sb.append(UnicodeCodepointToThdlWylie.getThdlWylieForUnicodeCodepoint(prefix));
|
||||
if (!hasHeadLetter()) {
|
||||
if (EWC_ya == rootLetter) {
|
||||
if (isConsonantThatTakesYaBtags(prefix))
|
||||
|
@ -857,55 +857,55 @@ public class LegalTshegBar
|
|||
sb.append(TibetanMachineWeb.WYLIE_DISAMBIGUATING_KEY);
|
||||
}
|
||||
if (hasHeadLetter())
|
||||
sb.append(UnicodeCharToThdlWylie.getThdlWylieForUnicodeChar(getHeadLetter()));
|
||||
sb.append(UnicodeCharToThdlWylie.getThdlWylieForUnicodeChar(rootLetter));
|
||||
sb.append(UnicodeCodepointToThdlWylie.getThdlWylieForUnicodeCodepoint(getHeadLetter()));
|
||||
sb.append(UnicodeCodepointToThdlWylie.getThdlWylieForUnicodeCodepoint(rootLetter));
|
||||
if (hasSubjoinedLetter())
|
||||
sb.append(UnicodeCharToThdlWylie.getThdlWylieForUnicodeChar(getSubjoinedLetter()));
|
||||
sb.append(UnicodeCodepointToThdlWylie.getThdlWylieForUnicodeCodepoint(getSubjoinedLetter()));
|
||||
if (hasWaZurSubjoinedToRootLetter())
|
||||
sb.append(UnicodeCharToThdlWylie.getThdlWylieForUnicodeChar(EWSUB_wa_zur));
|
||||
sb.append(UnicodeCodepointToThdlWylie.getThdlWylieForUnicodeCodepoint(EWSUB_wa_zur));
|
||||
|
||||
// a-chung is treated, in THDL Extended Wylie, like a vowel.
|
||||
// I.e., you don't have 'pAa', you have 'pA'.
|
||||
if (hasAChungOnRootLetter()) {
|
||||
if (hasExplicitVowel()) {
|
||||
if (EWV_i == getVowel()) {
|
||||
sb.append(UnicodeCharToThdlWylie.getThdlWylieForUnicodeChar('\u0F73'));
|
||||
sb.append(UnicodeCodepointToThdlWylie.getThdlWylieForUnicodeCodepoint('\u0F73'));
|
||||
} else if (EWV_u == getVowel()) {
|
||||
sb.append(UnicodeCharToThdlWylie.getThdlWylieForUnicodeChar('\u0F75'));
|
||||
sb.append(UnicodeCodepointToThdlWylie.getThdlWylieForUnicodeCodepoint('\u0F75'));
|
||||
} else if (EWV_e == getVowel() || EWV_o == getVowel()) {
|
||||
// The exception to the rule for a-chung and vowels...
|
||||
|
||||
// DLC FIXME: are these allowed in legal Tibetan?
|
||||
// EWTS would have special cases for them if so,
|
||||
// I'd wager...
|
||||
sb.append(UnicodeCharToThdlWylie.getThdlWylieForUnicodeChar(EW_achung));
|
||||
sb.append(UnicodeCharToThdlWylie.getThdlWylieForUnicodeChar(getVowel()));
|
||||
sb.append(UnicodeCodepointToThdlWylie.getThdlWylieForUnicodeCodepoint(EW_achung));
|
||||
sb.append(UnicodeCodepointToThdlWylie.getThdlWylieForUnicodeCodepoint(getVowel()));
|
||||
} else {
|
||||
ThdlDebug.abort("only simple vowels occur in this class, how did this get past internalLegalityTest(..)?");
|
||||
}
|
||||
} else {
|
||||
sb.append(UnicodeCharToThdlWylie.getThdlWylieForUnicodeChar(EW_achung));
|
||||
sb.append(UnicodeCodepointToThdlWylie.getThdlWylieForUnicodeCodepoint(EW_achung));
|
||||
}
|
||||
} else {
|
||||
if (hasExplicitVowel())
|
||||
sb.append(UnicodeCharToThdlWylie.getThdlWylieForUnicodeChar(getVowel()));
|
||||
sb.append(UnicodeCodepointToThdlWylie.getThdlWylieForUnicodeCodepoint(getVowel()));
|
||||
else
|
||||
sb.append("a");
|
||||
}
|
||||
|
||||
if (hasSuffix()) {
|
||||
String suf = getSuffix();
|
||||
sb.append(UnicodeCharToThdlWylie.getThdlWylieForUnicodeChar(suf.charAt(0)));
|
||||
sb.append(UnicodeCodepointToThdlWylie.getThdlWylieForUnicodeCodepoint(suf.charAt(0)));
|
||||
if (suf.length() > 1) {
|
||||
// DLC assert, don't verify, that the length is two.
|
||||
// This could change if I learn of more suffix
|
||||
// particles.
|
||||
ThdlDebug.verify(2 == suf.length());
|
||||
sb.append(UnicodeCharToThdlWylie.getThdlWylieForUnicodeChar(suf.charAt(1)));
|
||||
sb.append(UnicodeCodepointToThdlWylie.getThdlWylieForUnicodeCodepoint(suf.charAt(1)));
|
||||
}
|
||||
}
|
||||
if (hasPostsuffix())
|
||||
sb.append(UnicodeCharToThdlWylie.getThdlWylieForUnicodeChar(getPostsuffix()));
|
||||
sb.append(UnicodeCodepointToThdlWylie.getThdlWylieForUnicodeCodepoint(getPostsuffix()));
|
||||
return sb;
|
||||
}
|
||||
|
||||
|
@ -929,18 +929,18 @@ public class LegalTshegBar
|
|||
+ "transliterationType=\"THDL Extended Wylie 0.5\" "
|
||||
+ (hasPrefix()
|
||||
? ("prefix=\""
|
||||
+ UnicodeCharToThdlWylie.getThdlWylieForUnicodeChar(getPrefix()) + "\" ")
|
||||
+ UnicodeCodepointToThdlWylie.getThdlWylieForUnicodeCodepoint(getPrefix()) + "\" ")
|
||||
: "")
|
||||
+ (hasHeadLetter()
|
||||
? ("headLetter=\""
|
||||
+ UnicodeCharToThdlWylie.getThdlWylieForUnicodeChar(getHeadLetter())
|
||||
+ UnicodeCodepointToThdlWylie.getThdlWylieForUnicodeCodepoint(getHeadLetter())
|
||||
+ "\" ")
|
||||
: "")
|
||||
+ ("rootLetter=\""
|
||||
+ UnicodeCharToThdlWylie.getThdlWylieForUnicodeChar(getRootLetter()) + "\" ")
|
||||
+ UnicodeCodepointToThdlWylie.getThdlWylieForUnicodeCodepoint(getRootLetter()) + "\" ")
|
||||
+ (hasSubjoinedLetter()
|
||||
? ("subjoinedLetter=\""
|
||||
+ UnicodeCharToThdlWylie.getThdlWylieForUnicodeChar(getSubjoinedLetter())
|
||||
+ UnicodeCodepointToThdlWylie.getThdlWylieForUnicodeCodepoint(getSubjoinedLetter())
|
||||
+ "\" ")
|
||||
: "")
|
||||
+ (hasWaZurSubjoinedToRootLetter()
|
||||
|
@ -953,17 +953,17 @@ public class LegalTshegBar
|
|||
// DLC NOW: what about the root letter a, i.e. \u0F68 ? do we want the EWTS to be 'aa' ?
|
||||
+ ("vowel=\""
|
||||
+ (hasExplicitVowel()
|
||||
? UnicodeCharToThdlWylie.getThdlWylieForUnicodeChar(getVowel())
|
||||
? UnicodeCodepointToThdlWylie.getThdlWylieForUnicodeCodepoint(getVowel())
|
||||
: "a")
|
||||
+ "\" ")
|
||||
+ (hasSuffix()
|
||||
? ("suffix=\""
|
||||
+ UnicodeCharToThdlWylie.getThdlWylieForUnicodeString(getSuffix())
|
||||
+ UnicodeCodepointToThdlWylie.getThdlWylieForUnicodeString(getSuffix())
|
||||
+ "\" ")
|
||||
: "")
|
||||
+ (hasPostsuffix()
|
||||
? ("postsuffix=\""
|
||||
+ UnicodeCharToThdlWylie.getThdlWylieForUnicodeChar(getPostsuffix())
|
||||
+ UnicodeCodepointToThdlWylie.getThdlWylieForUnicodeCodepoint(getPostsuffix())
|
||||
+ "\" ")
|
||||
: "")
|
||||
+ "/>");
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue