DKY'O etc. weren't being handled properly by ACIP->Tibetan. Now they are.
This commit is contained in:
parent
e799438f86
commit
557ed7ed44
5 changed files with 75 additions and 29 deletions
3
source/org/thdl/tib/text/ttt/.cvsignore
Normal file
3
source/org/thdl/tib/text/ttt/.cvsignore
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
*~
|
||||||
|
*.bak
|
||||||
|
.#*
|
|
@ -557,4 +557,40 @@ public class ACIPRules {
|
||||||
duff.add(TibetanMachineWeb.getGlyph("H"));
|
duff.add(TibetanMachineWeb.getGlyph("H"));
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Returns true if and only if l is the ACIP representation of a
|
||||||
|
letter that can be a suffix. Note that all postsuffixes are
|
||||||
|
also suffixes. l must not have an "A" -- use "S", not "SA",
|
||||||
|
that is. */
|
||||||
|
public static boolean isACIPSuffix(String l) {
|
||||||
|
return ("S".equals(l)
|
||||||
|
|| "G".equals(l)
|
||||||
|
|| "D".equals(l)
|
||||||
|
|| "M".equals(l)
|
||||||
|
|| "'".equals(l)
|
||||||
|
|| "B".equals(l)
|
||||||
|
|| "NG".equals(l)
|
||||||
|
|| "N".equals(l)
|
||||||
|
|| "L".equals(l)
|
||||||
|
|| "R".equals(l));
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns true if and only if l is the ACIP representation of a
|
||||||
|
letter that can be a prefix. l must not have an "A" -- use
|
||||||
|
"D", not "DA", that is. */
|
||||||
|
public static boolean isACIPPrefix(String l) {
|
||||||
|
return ("'".equals(l)
|
||||||
|
|| "M".equals(l)
|
||||||
|
|| "B".equals(l)
|
||||||
|
|| "D".equals(l)
|
||||||
|
|| "G".equals(l));
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns true if and only if l is the ACIP representation of a
|
||||||
|
letter that can be a postsuffix. l must not have an "A" --
|
||||||
|
use "D", not "DA", that is. */
|
||||||
|
public static boolean isACIPPostsuffix(String l) {
|
||||||
|
return ("S".equals(l)
|
||||||
|
|| "D".equals(l));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -408,10 +408,8 @@ tstHelper("KA'", "[(K . A), (' . )]",
|
||||||
tstHelper("SNYAMS'AM'ANG", "{S}{NYA}{M}{S'A}{M'A}{NG}", null, null, "{S+NYA}{M+S'A}{M'A}{NG}", 0);
|
tstHelper("SNYAMS'AM'ANG", "{S}{NYA}{M}{S'A}{M'A}{NG}", null, null, "{S+NYA}{M+S'A}{M'A}{NG}", 0);
|
||||||
tstHelper("SNYAM'AM", null, null, null, "{S+NYA}{M}{'A}{M}", 2);
|
tstHelper("SNYAM'AM", null, null, null, "{S+NYA}{M}{'A}{M}", 2);
|
||||||
tstHelper("SNYAMS'AM", null, null, null, "{S+NYA}{M}{S}{'A}{M}", 2);
|
tstHelper("SNYAMS'AM", null, null, null, "{S+NYA}{M}{S}{'A}{M}", 2);
|
||||||
tstHelper("SNYAM-'A-M", null, null, null, "!null!", 1);
|
|
||||||
tstHelper("SNYAM-'A-M", null, null, null, "{S+NYA}{M}{'A}{M}", -1);
|
tstHelper("SNYAM-'A-M", null, null, null, "{S+NYA}{M}{'A}{M}", -1);
|
||||||
tstHelper("SNY-M-'-M", null, null, null, "{S+NY}{M}{'}{M}", -1);
|
tstHelper("SNY-M-'-M", null, null, null, "{S+NY}{M}{'}{M}", -1);
|
||||||
tstHelper("SNY-M-'-M", null, null, null, "!null!", 1);
|
|
||||||
tstHelper("SNYAMS'AM'ANG'U'I'O", null, null, null, "{S+NYA}{M}{S}{'A}{M}{'A}{NG}{'U}{'I}{'O}", 2);
|
tstHelper("SNYAMS'AM'ANG'U'I'O", null, null, null, "{S+NYA}{M}{S}{'A}{M}{'A}{NG}{'U}{'I}{'O}", 2);
|
||||||
tstHelper("SNYAMS'I'AM'ANG'U'I'O", null, null, null, "{S+NYA}{M}{S}{'I}{'A}{M}{'A}{NG}{'U}{'I}{'O}", 2);
|
tstHelper("SNYAMS'I'AM'ANG'U'I'O", null, null, null, "{S+NYA}{M}{S}{'I}{'A}{M}{'A}{NG}{'U}{'I}{'O}", 2);
|
||||||
tstHelper("SNYAM+S+'O", null, null, null, "{S+NYA}{M+S+'O}", -1);
|
tstHelper("SNYAM+S+'O", null, null, null, "{S+NYA}{M+S+'O}", -1);
|
||||||
|
@ -421,7 +419,6 @@ tstHelper("KA'", "[(K . A), (' . )]",
|
||||||
tstHelper("SAM'US", null, null, null, "{SA}{M}{'U}{S}", 2);
|
tstHelper("SAM'US", null, null, null, "{SA}{M}{'U}{S}", 2);
|
||||||
tstHelper("SAM'UR'US", null, null, null, "{SA}{M}{'U}{R}{'U}{S}", 2);
|
tstHelper("SAM'UR'US", null, null, null, "{SA}{M}{'U}{R}{'U}{S}", 2);
|
||||||
tstHelper("LA'OS", null, null, null, "{LA}{'O}{S}", -1);
|
tstHelper("LA'OS", null, null, null, "{LA}{'O}{S}", -1);
|
||||||
tstHelper("LA'OS", null, null, null, "!null!", 1);
|
|
||||||
tstHelper("NA'OS", null, null, null, "{NA}{'O}{S}", -1);
|
tstHelper("NA'OS", null, null, null, "{NA}{'O}{S}", -1);
|
||||||
tstHelper("NA'IS", null, null, null, "{NA}{'I}{S}", -1);
|
tstHelper("NA'IS", null, null, null, "{NA}{'I}{S}", -1);
|
||||||
tstHelper("LE'UNG", null, null, null, "{LE}{'U}{NG}", -1);
|
tstHelper("LE'UNG", null, null, null, "{LE}{'U}{NG}", -1);
|
||||||
|
@ -456,12 +453,17 @@ tstHelper("KA'", "[(K . A), (' . )]",
|
||||||
}
|
}
|
||||||
|
|
||||||
tstHelper("GDAM-S'O", null, null, null, "{G}{DA}{M}{S}{'O}", 2);
|
tstHelper("GDAM-S'O", null, null, null, "{G}{DA}{M}{S}{'O}", 2);
|
||||||
tstHelper("GDAM-C'O", null, null, null, "{G+DA}{M}{C'O}", 0);
|
tstHelper("GDAM-C'O", null, null, null, "{G+DA}{M}{C'O}", -1);
|
||||||
tstHelper("GDAM-C'O", null, null, null, "{G+DA}{M}{C}{'O}", 3);
|
|
||||||
tstHelper("BRLA", null, null, null, "{B}{R+LA}", -1);
|
tstHelper("BRLA", null, null, null, "{B}{R+LA}", -1);
|
||||||
tstHelper("DKY", null, null, null, "{D}{K+Y}", -1);
|
tstHelper("DKY", null, null, null, "{D}{K+Y}", -1);
|
||||||
// DLC FIXME NOW this should work, but doesn't: tstHelper("DKY'O", null, null, null, "{D}{K+Y'O}", 3);
|
tstHelper("DKY'O", null, null, null, "{D}{K+Y'O}", -1);
|
||||||
tstHelper("DKYA'O", null, null, null, "{D}{K+YA}{'O}", -1);
|
tstHelper("DKYA'O", null, null, null, "{D}{K+YA}{'O}", -1);
|
||||||
|
tstHelper("GM'O", null, null, null, "{G+M'O}", -1);
|
||||||
|
tstHelper("GAM'O", null, null, null, "{GA}{M}{'O}", 2);
|
||||||
|
tstHelper("G-M'O", null, null, null, "{G}{M'O}", -1);
|
||||||
|
tstHelper("DM'O", null, null, null, "{D}{M'O}", -1);
|
||||||
|
tstHelper("DAM'O", null, null, null, "{DA}{M}{'O}", 2);
|
||||||
|
tstHelper("D-M'O", null, null, null, "{D}{M'O}", -1);
|
||||||
|
|
||||||
tstHelper("SHR'I", "{SH}{R'I}",
|
tstHelper("SHR'I", "{SH}{R'I}",
|
||||||
null,
|
null,
|
||||||
|
|
|
@ -119,11 +119,7 @@ class TPair {
|
||||||
&& ((null == r || "".equals(r))
|
&& ((null == r || "".equals(r))
|
||||||
|| "-".equals(r)
|
|| "-".equals(r)
|
||||||
|| "A".equals(r)) // DLC FIXME: though check for BASKYABS and warn because BSKYABS is more common
|
|| "A".equals(r)) // DLC FIXME: though check for BASKYABS and warn because BSKYABS is more common
|
||||||
&& ("'".equals(l)
|
&& ACIPRules.isACIPPrefix(l));
|
||||||
|| "M".equals(l)
|
|
||||||
|| "B".equals(l)
|
|
||||||
|| "D".equals(l)
|
|
||||||
|| "G".equals(l)));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns true if and only if this pair could be a Tibetan
|
/** Returns true if and only if this pair could be a Tibetan
|
||||||
|
@ -133,8 +129,7 @@ class TPair {
|
||||||
&& ((null == r || "".equals(r))
|
&& ((null == r || "".equals(r))
|
||||||
|| "-".equals(r)
|
|| "-".equals(r)
|
||||||
|| "A".equals(r)) // DLC FIXME: though warn about GAMASA vs. GAMS
|
|| "A".equals(r)) // DLC FIXME: though warn about GAMASA vs. GAMS
|
||||||
&& ("S".equals(l)
|
&& ACIPRules.isACIPPostsuffix(l));
|
||||||
|| "D".equals(l)));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns true if and only if this pair could be a Tibetan
|
/** Returns true if and only if this pair could be a Tibetan
|
||||||
|
@ -144,16 +139,7 @@ class TPair {
|
||||||
&& ((null == r || "".equals(r))
|
&& ((null == r || "".equals(r))
|
||||||
|| "-".equals(r)
|
|| "-".equals(r)
|
||||||
|| "A".equals(r))
|
|| "A".equals(r))
|
||||||
&& ("S".equals(l)
|
&& ACIPRules.isACIPSuffix(l));
|
||||||
|| "G".equals(l)
|
|
||||||
|| "D".equals(l)
|
|
||||||
|| "M".equals(l)
|
|
||||||
|| "'".equals(l)
|
|
||||||
|| "B".equals(l)
|
|
||||||
|| "NG".equals(l)
|
|
||||||
|| "N".equals(l)
|
|
||||||
|| "L".equals(l)
|
|
||||||
|| "R".equals(l)));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns true if and only if this pair is merely a
|
/** Returns true if and only if this pair is merely a
|
||||||
|
|
|
@ -60,8 +60,8 @@ class TPairListFactory {
|
||||||
* but you don't) */
|
* but you don't) */
|
||||||
static TPairList[] breakACIPIntoChunks(String acip) throws IllegalArgumentException {
|
static TPairList[] breakACIPIntoChunks(String acip) throws IllegalArgumentException {
|
||||||
try {
|
try {
|
||||||
TPairList a = breakHelper(acip, true);
|
TPairList a = breakHelper(acip, true, false);
|
||||||
TPairList b = breakHelper(acip, false);
|
TPairList b = breakHelper(acip, false, false);
|
||||||
if (a.equals(b))
|
if (a.equals(b))
|
||||||
return new TPairList[] { a, null };
|
return new TPairList[] { a, null };
|
||||||
else
|
else
|
||||||
|
@ -72,8 +72,15 @@ class TPairListFactory {
|
||||||
throw new IllegalArgumentException("Input too large[2]: " + acip);
|
throw new IllegalArgumentException("Input too large[2]: " + acip);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/** Helps {@link breakACIPIntoChunks(String)}. */
|
/** Helps {@link breakACIPIntoChunks(String)}.
|
||||||
private static TPairList breakHelper(String acip, boolean tickIsVowel) {
|
* @param tickIsVowel true if and only if you want to treat the
|
||||||
|
* ACIP {'} as an U+0F71 vowel instead of the full-sized
|
||||||
|
* consonant in special, "this might be an appendage like 'AM or
|
||||||
|
* 'ANG" circumstances
|
||||||
|
* @param weHaveSeenVowelAlready true if and only if, in our
|
||||||
|
* recursion, we've already found one vowel (not a disambiguator,
|
||||||
|
* but a vowel like "A", "E", "Um:", "'U", etc.) */
|
||||||
|
private static TPairList breakHelper(String acip, boolean tickIsVowel, boolean weHaveSeenVowelAlready) {
|
||||||
|
|
||||||
// base case for our recursion:
|
// base case for our recursion:
|
||||||
if ("".equals(acip))
|
if ("".equals(acip))
|
||||||
|
@ -86,6 +93,8 @@ class TPairListFactory {
|
||||||
if (!tickIsVowel
|
if (!tickIsVowel
|
||||||
&& null != head.getLeft()
|
&& null != head.getLeft()
|
||||||
&& null != head.getRight()
|
&& null != head.getRight()
|
||||||
|
&& weHaveSeenVowelAlready
|
||||||
|
&& ACIPRules.isACIPSuffix(head.getLeft()) // DKY'O should be two horizontal units, not three. -- {D}{KY'O}, not {D}{KY}{'O}.
|
||||||
&& head.getRight().startsWith("'")) {
|
&& head.getRight().startsWith("'")) {
|
||||||
head = new TPair(head.getLeft(),
|
head = new TPair(head.getLeft(),
|
||||||
// Without this disambiguator, we are
|
// Without this disambiguator, we are
|
||||||
|
@ -97,7 +106,12 @@ class TPairListFactory {
|
||||||
|
|
||||||
TPairList tail;
|
TPairList tail;
|
||||||
if ((tail
|
if ((tail
|
||||||
= breakHelper(acipBuf.substring(howMuch), tickIsVowel)).hasSimpleError()) {
|
= breakHelper(acipBuf.substring(howMuch),
|
||||||
|
tickIsVowel,
|
||||||
|
weHaveSeenVowelAlready
|
||||||
|
|| (head.getRight() != null
|
||||||
|
&& !"+".equals(head.getRight())
|
||||||
|
&& !"-".equals(head.getRight())))).hasSimpleError()) {
|
||||||
for (int i = 1; i < howMuch; i++) {
|
for (int i = 1; i < howMuch; i++) {
|
||||||
// try giving i characters back if that leaves us with
|
// try giving i characters back if that leaves us with
|
||||||
// a legal head and makes the rest free of simple
|
// a legal head and makes the rest free of simple
|
||||||
|
@ -106,7 +120,12 @@ class TPairListFactory {
|
||||||
TPair newHead;
|
TPair newHead;
|
||||||
if ((newHead = head.minusNRightmostACIPCharacters(i)).isLegal()
|
if ((newHead = head.minusNRightmostACIPCharacters(i)).isLegal()
|
||||||
&& !(newTail
|
&& !(newTail
|
||||||
= breakHelper(acipBuf.substring(howMuch - i), tickIsVowel)).hasSimpleError()) {
|
= breakHelper(acipBuf.substring(howMuch - i),
|
||||||
|
tickIsVowel,
|
||||||
|
weHaveSeenVowelAlready
|
||||||
|
|| (newHead.getRight() != null
|
||||||
|
&& !"+".equals(newHead.getRight())
|
||||||
|
&& !"-".equals(newHead.getRight())))).hasSimpleError()) {
|
||||||
newTail.prepend(newHead);
|
newTail.prepend(newHead);
|
||||||
return newTail;
|
return newTail;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue