Fixed ACIP->Unicode spaces/tshegs and newlines, especially with shads.
"NGA," becomes "NGA-tsheg-," automatically now.
This commit is contained in:
parent
5c240ac072
commit
717c3b94f3
8 changed files with 151 additions and 107 deletions
|
@ -797,7 +797,8 @@ public class DuffPane extends TibetanPane implements FocusListener {
|
||||||
before_vowel.add(dc_1);
|
before_vowel.add(dc_1);
|
||||||
|
|
||||||
before_vowel.add(dc_2);
|
before_vowel.add(dc_2);
|
||||||
java.util.List after_vowel = TibTextUtils.getVowel(dc_1, dc_2, v);
|
java.util.List after_vowel = new ArrayList();
|
||||||
|
TibTextUtils.getVowel(after_vowel, dc_1, dc_2, v);
|
||||||
if (after_vowel.size() >= before_vowel.size()) {
|
if (after_vowel.size() >= before_vowel.size()) {
|
||||||
setNumberOfGlyphsForLastVowel(after_vowel.size()
|
setNumberOfGlyphsForLastVowel(after_vowel.size()
|
||||||
- before_vowel.size());
|
- before_vowel.size());
|
||||||
|
@ -842,7 +843,8 @@ public class DuffPane extends TibetanPane implements FocusListener {
|
||||||
private void printAChenWithVowel(String v) {
|
private void printAChenWithVowel(String v) {
|
||||||
DuffCode[] dc_array = (DuffCode[])TibetanMachineWeb.getTibHash().get(TibetanMachineWeb.ACHEN);
|
DuffCode[] dc_array = (DuffCode[])TibetanMachineWeb.getTibHash().get(TibetanMachineWeb.ACHEN);
|
||||||
DuffCode dc = dc_array[TibetanMachineWeb.TMW];
|
DuffCode dc = dc_array[TibetanMachineWeb.TMW];
|
||||||
java.util.List achenlist = TibTextUtils.getVowel(dc,v);
|
java.util.List achenlist = new ArrayList();
|
||||||
|
TibTextUtils.getVowel(achenlist, dc, v);
|
||||||
DuffData[] dd = TibTextUtils.convertGlyphs(achenlist);
|
DuffData[] dd = TibTextUtils.convertGlyphs(achenlist);
|
||||||
getTibDoc().insertDuff(caret.getDot(), dd);
|
getTibDoc().insertDuff(caret.getDot(), dd);
|
||||||
}
|
}
|
||||||
|
|
|
@ -375,7 +375,7 @@ public class TibTextUtils implements THDLWylieConstants {
|
||||||
if (!chars.isEmpty()) {
|
if (!chars.isEmpty()) {
|
||||||
glyphs.addAll(getGlyphs(chars, true, !isSanskrit, isSanskrit));
|
glyphs.addAll(getGlyphs(chars, true, !isSanskrit, isSanskrit));
|
||||||
dc = (DuffCode)glyphs.removeLast(); //LinkedList implementation
|
dc = (DuffCode)glyphs.removeLast(); //LinkedList implementation
|
||||||
glyphs.addAll(getVowel(dc, next));
|
getVowel(glyphs, dc, next);
|
||||||
chars.clear();
|
chars.clear();
|
||||||
}
|
}
|
||||||
else { //if previous is punctuation or null, then achen plus vowel - otherwise, previous could be vowel
|
else { //if previous is punctuation or null, then achen plus vowel - otherwise, previous could be vowel
|
||||||
|
@ -387,13 +387,13 @@ public class TibTextUtils implements THDLWylieConstants {
|
||||||
if (!TibetanMachineWeb.isWyliePunc(TibetanMachineWeb.getWylieForGlyph(dc, weDoNotCareIfThereIsCorrespondingWylieOrNot))) {
|
if (!TibetanMachineWeb.isWyliePunc(TibetanMachineWeb.getWylieForGlyph(dc, weDoNotCareIfThereIsCorrespondingWylieOrNot))) {
|
||||||
DuffCode dc_2 = (DuffCode)glyphs.removeLast();
|
DuffCode dc_2 = (DuffCode)glyphs.removeLast();
|
||||||
DuffCode dc_1 = (DuffCode)glyphs.removeLast();
|
DuffCode dc_1 = (DuffCode)glyphs.removeLast();
|
||||||
glyphs.addAll(getVowel(dc_1, dc_2, next));
|
getVowel(glyphs, dc_1, dc_2, next);
|
||||||
break vowel_block;
|
break vowel_block;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
DuffCode[] dc_array = (DuffCode[])TibetanMachineWeb.getTibHash().get(ACHEN);
|
DuffCode[] dc_array = (DuffCode[])TibetanMachineWeb.getTibHash().get(ACHEN);
|
||||||
dc = dc_array[TibetanMachineWeb.TMW];
|
dc = dc_array[TibetanMachineWeb.TMW];
|
||||||
glyphs.addAll(getVowel(dc, next));
|
getVowel(glyphs, dc, next);
|
||||||
}
|
}
|
||||||
|
|
||||||
chars.clear();
|
chars.clear();
|
||||||
|
@ -442,6 +442,7 @@ public class TibTextUtils implements THDLWylieConstants {
|
||||||
default:
|
default:
|
||||||
String top_char = (String)chars.get(chars.size()-1);
|
String top_char = (String)chars.get(chars.size()-1);
|
||||||
chars.remove(chars.size()-1);
|
chars.remove(chars.size()-1);
|
||||||
|
// DLC PERFORMANCE FIXME: make glyphs a parameter
|
||||||
glyphs.addAll(getGlyphs(chars, true, !isSanskrit, isSanskrit));
|
glyphs.addAll(getGlyphs(chars, true, !isSanskrit, isSanskrit));
|
||||||
chars.clear();
|
chars.clear();
|
||||||
chars.add(top_char);
|
chars.add(top_char);
|
||||||
|
@ -520,39 +521,39 @@ public class TibTextUtils implements THDLWylieConstants {
|
||||||
* @param vowel the vowel you want to affix, in Wylie
|
* @param vowel the vowel you want to affix, in Wylie
|
||||||
* @return a List of glyphs equal to the vowel in context
|
* @return a List of glyphs equal to the vowel in context
|
||||||
*/
|
*/
|
||||||
public static List getVowel(DuffCode context, String vowel) {
|
public static void getVowel(List l, DuffCode context, String vowel) {
|
||||||
return getVowel(null, context, vowel);
|
getVowel(l, null, context, vowel);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets the vowel sequence for a given vowel in a given context.
|
* Gets the vowel sequence for a given vowel in a given context and
|
||||||
* Given a context, this method affixes a vowel and returns the context plus the vowel.
|
* appends it to l. Given a context, this method affixes a vowel and
|
||||||
* Since the choice of vowel glyph depends on the consonant to which it is attached,
|
* appends the context plus the vowel to l. Since the choice of vowel
|
||||||
* generally it is enough to provide just the immediately preceding context. However,
|
* glyph depends on the consonant to which it is attached, generally it
|
||||||
* in some cases, double vowels are allowed - for example 'buo'. To find the correct
|
* is enough to provide just the immediately preceding
|
||||||
* glyph for 'o', we need 'b' in this case, not 'u'. Note also that some Extended
|
* context. However, in some cases, double vowels are allowed - for
|
||||||
* Wylie vowels correspond to multiple glyphs in TibetanMachineWeb. For example,
|
* example 'buo'. To find the correct glyph for 'o', we need 'b' in
|
||||||
* the vowel I consists of both an achung and a reverse gigu. All required glyphs
|
* this case, not 'u'. Note also that some Extended Wylie vowels
|
||||||
* are part of the returned List.
|
* correspond to multiple glyphs in TibetanMachineWeb. For example, the
|
||||||
* @param context_1 the glyph occurring two glyphs before the vowel you want to affix
|
* vowel I consists of both an achung and a reverse gigu. All required
|
||||||
* @param context_2 the glyph immediately before the vowel you want to affix
|
* glyphs are appended to l.
|
||||||
* @param vowel the vowel you want to affix, in Wylie
|
* @param context_1 the glyph occurring two glyphs before the vowel you
|
||||||
* @return a List of glyphs equal to the vowel in context
|
* want to affix
|
||||||
*/
|
* @param context_2 the glyph immediately before the vowel you want to
|
||||||
|
* affix
|
||||||
public static List getVowel(DuffCode context_1, DuffCode context_2, String vowel) {
|
* @param vowel the vowel you want to affix, in Wylie */
|
||||||
List vowels = new ArrayList();
|
|
||||||
|
|
||||||
|
public static void getVowel(List l, DuffCode context_1, DuffCode context_2, String vowel) {
|
||||||
//this vowel doesn't correspond to a glyph -
|
//this vowel doesn't correspond to a glyph -
|
||||||
//so you just return the original context
|
//so you just return the original context
|
||||||
|
|
||||||
if ( vowel.equals(WYLIE_aVOWEL) ||
|
if ( vowel.equals(WYLIE_aVOWEL) ||
|
||||||
TibetanMachineWeb.isTopVowel(context_2)) {
|
TibetanMachineWeb.isTopVowel(context_2)) {
|
||||||
if (context_1 != null)
|
if (context_1 != null)
|
||||||
vowels.add(context_1);
|
l.add(context_1);
|
||||||
|
|
||||||
vowels.add(context_2);
|
l.add(context_2);
|
||||||
return vowels;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
//first, the three easiest cases: ai, au, and <i
|
//first, the three easiest cases: ai, au, and <i
|
||||||
|
@ -561,36 +562,36 @@ public class TibTextUtils implements THDLWylieConstants {
|
||||||
|
|
||||||
if (vowel.equals(ai_VOWEL)) {
|
if (vowel.equals(ai_VOWEL)) {
|
||||||
if (context_1 != null)
|
if (context_1 != null)
|
||||||
vowels.add(context_1);
|
l.add(context_1);
|
||||||
|
|
||||||
vowels.add(context_2);
|
l.add(context_2);
|
||||||
DuffCode[] dc_v = (DuffCode[])TibetanMachineWeb.getTibHash().get(ai_VOWEL);
|
DuffCode[] dc_v = (DuffCode[])TibetanMachineWeb.getTibHash().get(ai_VOWEL);
|
||||||
vowels.add(dc_v[TibetanMachineWeb.TMW]);
|
l.add(dc_v[TibetanMachineWeb.TMW]);
|
||||||
return vowels;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (vowel.equals(au_VOWEL)) {
|
if (vowel.equals(au_VOWEL)) {
|
||||||
if (context_1 != null)
|
if (context_1 != null)
|
||||||
vowels.add(context_1);
|
l.add(context_1);
|
||||||
|
|
||||||
vowels.add(context_2);
|
l.add(context_2);
|
||||||
DuffCode[] dc_v = (DuffCode[])TibetanMachineWeb.getTibHash().get(au_VOWEL);
|
DuffCode[] dc_v = (DuffCode[])TibetanMachineWeb.getTibHash().get(au_VOWEL);
|
||||||
vowels.add(dc_v[TibetanMachineWeb.TMW]);
|
l.add(dc_v[TibetanMachineWeb.TMW]);
|
||||||
return vowels;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (vowel.equals(reverse_i_VOWEL)) {
|
if (vowel.equals(reverse_i_VOWEL)) {
|
||||||
if (context_1 != null)
|
if (context_1 != null)
|
||||||
vowels.add(context_1);
|
l.add(context_1);
|
||||||
|
|
||||||
vowels.add(context_2);
|
l.add(context_2);
|
||||||
|
|
||||||
if (!TibetanMachineWeb.isTopVowel(context_2)) {
|
if (!TibetanMachineWeb.isTopVowel(context_2)) {
|
||||||
DuffCode[] dc_v = (DuffCode[])TibetanMachineWeb.getTibHash().get(reverse_i_VOWEL);
|
DuffCode[] dc_v = (DuffCode[])TibetanMachineWeb.getTibHash().get(reverse_i_VOWEL);
|
||||||
vowels.add(dc_v[TibetanMachineWeb.TMW]);
|
l.add(dc_v[TibetanMachineWeb.TMW]);
|
||||||
}
|
}
|
||||||
|
|
||||||
return vowels;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
//second, the vowels i, e, and o
|
//second, the vowels i, e, and o
|
||||||
|
@ -609,14 +610,14 @@ public class TibTextUtils implements THDLWylieConstants {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (context_1 != null)
|
if (context_1 != null)
|
||||||
vowels.add(context_1);
|
l.add(context_1);
|
||||||
|
|
||||||
vowels.add(context_2);
|
l.add(context_2);
|
||||||
|
|
||||||
if (null != dc_v)
|
if (null != dc_v)
|
||||||
vowels.add(dc_v);
|
l.add(dc_v);
|
||||||
|
|
||||||
return vowels;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (vowel.equals(e_VOWEL)) {
|
if (vowel.equals(e_VOWEL)) {
|
||||||
|
@ -628,14 +629,14 @@ public class TibTextUtils implements THDLWylieConstants {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (context_1 != null)
|
if (context_1 != null)
|
||||||
vowels.add(context_1);
|
l.add(context_1);
|
||||||
|
|
||||||
vowels.add(context_2);
|
l.add(context_2);
|
||||||
|
|
||||||
if (null != dc_v)
|
if (null != dc_v)
|
||||||
vowels.add(dc_v);
|
l.add(dc_v);
|
||||||
|
|
||||||
return vowels;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (vowel.equals(o_VOWEL)) {
|
if (vowel.equals(o_VOWEL)) {
|
||||||
|
@ -647,14 +648,14 @@ public class TibTextUtils implements THDLWylieConstants {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (context_1 != null)
|
if (context_1 != null)
|
||||||
vowels.add(context_1);
|
l.add(context_1);
|
||||||
|
|
||||||
vowels.add(context_2);
|
l.add(context_2);
|
||||||
|
|
||||||
if (null != dc_v)
|
if (null != dc_v)
|
||||||
vowels.add(dc_v);
|
l.add(dc_v);
|
||||||
|
|
||||||
return vowels;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
//next come the vowels u, A, and U
|
//next come the vowels u, A, and U
|
||||||
|
@ -669,17 +670,17 @@ public class TibTextUtils implements THDLWylieConstants {
|
||||||
DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_u);
|
DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_u);
|
||||||
|
|
||||||
if (null != context_1)
|
if (null != context_1)
|
||||||
vowels.add(context_1);
|
l.add(context_1);
|
||||||
|
|
||||||
if (null == halfHeight)
|
if (null == halfHeight)
|
||||||
vowels.add(context_2);
|
l.add(context_2);
|
||||||
else
|
else
|
||||||
vowels.add(halfHeight);
|
l.add(halfHeight);
|
||||||
|
|
||||||
if (null != dc_v)
|
if (null != dc_v)
|
||||||
vowels.add(dc_v);
|
l.add(dc_v);
|
||||||
|
|
||||||
return vowels;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (vowel.equals(A_VOWEL)) {
|
if (vowel.equals(A_VOWEL)) {
|
||||||
|
@ -688,18 +689,18 @@ public class TibTextUtils implements THDLWylieConstants {
|
||||||
DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_A);
|
DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_A);
|
||||||
|
|
||||||
if (null != context_1)
|
if (null != context_1)
|
||||||
vowels.add(context_1);
|
l.add(context_1);
|
||||||
|
|
||||||
if (null == halfHeight)
|
if (null == halfHeight)
|
||||||
vowels.add(context_2);
|
l.add(context_2);
|
||||||
else
|
else
|
||||||
vowels.add(halfHeight);
|
l.add(halfHeight);
|
||||||
|
|
||||||
if (null != dc_v)
|
if (null != dc_v)
|
||||||
|
|
||||||
vowels.add(dc_v);
|
l.add(dc_v);
|
||||||
|
|
||||||
return vowels;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (vowel.equals(U_VOWEL)) {
|
if (vowel.equals(U_VOWEL)) {
|
||||||
|
@ -708,17 +709,17 @@ public class TibTextUtils implements THDLWylieConstants {
|
||||||
DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_U);
|
DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_U);
|
||||||
|
|
||||||
if (null != context_1)
|
if (null != context_1)
|
||||||
vowels.add(context_1);
|
l.add(context_1);
|
||||||
|
|
||||||
if (null == halfHeight)
|
if (null == halfHeight)
|
||||||
vowels.add(context_2);
|
l.add(context_2);
|
||||||
else
|
else
|
||||||
vowels.add(halfHeight);
|
l.add(halfHeight);
|
||||||
|
|
||||||
if (null != dc_v && !TibetanMachineWeb.isTopVowel(context_2))
|
if (null != dc_v && !TibetanMachineWeb.isTopVowel(context_2))
|
||||||
vowels.add(dc_v);
|
l.add(dc_v);
|
||||||
|
|
||||||
return vowels;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
//finally, the vowels I and <I
|
//finally, the vowels I and <I
|
||||||
|
@ -733,19 +734,19 @@ public class TibTextUtils implements THDLWylieConstants {
|
||||||
DuffCode dc_v_sup = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_i);
|
DuffCode dc_v_sup = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_i);
|
||||||
|
|
||||||
if (null != context_1)
|
if (null != context_1)
|
||||||
vowels.add(context_1);
|
l.add(context_1);
|
||||||
|
|
||||||
if (null == halfHeight)
|
if (null == halfHeight)
|
||||||
vowels.add(context_2);
|
l.add(context_2);
|
||||||
else
|
else
|
||||||
vowels.add(halfHeight);
|
l.add(halfHeight);
|
||||||
|
|
||||||
if (null != dc_v_sub && null != dc_v_sup) {
|
if (null != dc_v_sub && null != dc_v_sup) {
|
||||||
vowels.add(dc_v_sub);
|
l.add(dc_v_sub);
|
||||||
vowels.add(dc_v_sup);
|
l.add(dc_v_sup);
|
||||||
}
|
}
|
||||||
|
|
||||||
return vowels;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (vowel.equals(reverse_I_VOWEL)) {
|
if (vowel.equals(reverse_I_VOWEL)) {
|
||||||
|
@ -756,22 +757,22 @@ public class TibTextUtils implements THDLWylieConstants {
|
||||||
DuffCode dc_v_sup = tv_array[TibetanMachineWeb.TMW];
|
DuffCode dc_v_sup = tv_array[TibetanMachineWeb.TMW];
|
||||||
|
|
||||||
if (null != context_1)
|
if (null != context_1)
|
||||||
vowels.add(context_1);
|
l.add(context_1);
|
||||||
|
|
||||||
if (null == halfHeight)
|
if (null == halfHeight)
|
||||||
vowels.add(context_2);
|
l.add(context_2);
|
||||||
else
|
else
|
||||||
vowels.add(halfHeight);
|
l.add(halfHeight);
|
||||||
|
|
||||||
if (null != dc_v_sub && null != dc_v_sup) {
|
if (null != dc_v_sub && null != dc_v_sup) {
|
||||||
vowels.add(dc_v_sub);
|
l.add(dc_v_sub);
|
||||||
vowels.add(dc_v_sup);
|
l.add(dc_v_sup);
|
||||||
}
|
}
|
||||||
|
|
||||||
return vowels;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
return null;
|
throw new Error("DLC can this happen? " + vowel);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -1161,8 +1161,9 @@ public static String getWylieForVowel(String s) {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets the DuffCode required for a vowel, if
|
* Gets the DuffCode required for a vowel, if affixed to the given
|
||||||
* affixed to the given hashKey.
|
* hashKey. Not as pretty as {@link
|
||||||
|
* TibTextUtils#getVowel(DuffCode,DuffCode,String)}.
|
||||||
* @param hashKey the key for the character the vowel is to be affixed
|
* @param hashKey the key for the character the vowel is to be affixed
|
||||||
* to; see {@link #getGlyph(String)} to learn about hash keys.
|
* to; see {@link #getGlyph(String)} to learn about hash keys.
|
||||||
* @param vowel the vowel you want the DuffCode for
|
* @param vowel the vowel you want the DuffCode for
|
||||||
|
@ -1170,7 +1171,7 @@ public static String getWylieForVowel(String s) {
|
||||||
* context, or null if there is no such vowel in
|
* context, or null if there is no such vowel in
|
||||||
* the context
|
* the context
|
||||||
* @see DuffCode
|
* @see DuffCode
|
||||||
*/
|
* @see TibTextUtils#getVowel(DuffCode,DuffCode,String) */
|
||||||
public static DuffCode getVowel(String hashKey, int vowel) {
|
public static DuffCode getVowel(String hashKey, int vowel) {
|
||||||
DuffCode[] dc = (DuffCode[])tibHash.get(hashKey);
|
DuffCode[] dc = (DuffCode[])tibHash.get(hashKey);
|
||||||
|
|
||||||
|
|
|
@ -132,7 +132,10 @@ public class ACIPConverter {
|
||||||
throws IOException
|
throws IOException
|
||||||
{
|
{
|
||||||
TibetanDocument tdoc = new TibetanDocument();
|
TibetanDocument tdoc = new TibetanDocument();
|
||||||
tdoc.setRomanAttributeSet("Courier", 20); // DLC make me configurable.
|
tdoc.setRomanAttributeSet(ThdlOptions.getStringOption("thdl.acip.to.x.latin.font",
|
||||||
|
"Courier New"),
|
||||||
|
ThdlOptions.getIntegerOption("thdl.acip.to.x.latin.font.size",
|
||||||
|
20));
|
||||||
boolean rv
|
boolean rv
|
||||||
= convertToTMW(scan, tdoc, errors, warnings,
|
= convertToTMW(scan, tdoc, errors, warnings,
|
||||||
writeWarningsToResult, warningLevel);
|
writeWarningsToResult, warningLevel);
|
||||||
|
@ -357,7 +360,7 @@ public class ACIPConverter {
|
||||||
} else if (stype == ACIPString.END_SLASH) {
|
} else if (stype == ACIPString.END_SLASH) {
|
||||||
if (null != writer) unicode = "\u0F3D";
|
if (null != writer) unicode = "\u0F3D";
|
||||||
if (null != tdoc) duff = new DuffCode[] { TibetanMachineWeb.getGlyph(")") };
|
if (null != tdoc) duff = new DuffCode[] { TibetanMachineWeb.getGlyph(")") };
|
||||||
} else {
|
} else if (stype == ACIPString.TIBETAN_PUNCTUATION) {
|
||||||
// For ACIP, tshegs are used as both
|
// For ACIP, tshegs are used as both
|
||||||
// tshegs and whitespace. We treat a
|
// tshegs and whitespace. We treat a
|
||||||
// space as a tsheg if and only if it
|
// space as a tsheg if and only if it
|
||||||
|
@ -368,8 +371,8 @@ public class ACIPConverter {
|
||||||
// typesetting.
|
// typesetting.
|
||||||
boolean done = false;
|
boolean done = false;
|
||||||
// DLC what about after numbers? marks?
|
// DLC what about after numbers? marks?
|
||||||
if (s.getText().equals(" ")) {
|
|
||||||
TPairList lpl = null;
|
TPairList lpl = null;
|
||||||
|
if (s.getText().equals(" ")) {
|
||||||
if (!lastGuyWasNonPunct
|
if (!lastGuyWasNonPunct
|
||||||
|| (null != lastGuy
|
|| (null != lastGuy
|
||||||
&& (lpl = lastGuy.get(lastGuy.size() - 1)).size() == 1
|
&& (lpl = lastGuy.get(lastGuy.size() - 1)).size() == 1
|
||||||
|
@ -389,7 +392,16 @@ public class ACIPConverter {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
} else if (s.getText().equals(",")
|
||||||
|
&& lastGuyWasNonPunct
|
||||||
|
&& null != lastGuy
|
||||||
|
&& (lpl = lastGuy.get(lastGuy.size() - 1)).size() == 1
|
||||||
|
&& lpl.get(0).getLeft().equals("NG")) {
|
||||||
|
DuffCode tshegDuff = TibetanMachineWeb.getGlyph(" ");
|
||||||
|
if (null == tshegDuff) throw new Error("tsheg duff");
|
||||||
|
tdoc.appendDuffCodes(new DuffCode[] { tshegDuff });
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!done) {
|
if (!done) {
|
||||||
if (null != writer) unicode = ACIPRules.getUnicodeFor(s.getText(), false);
|
if (null != writer) unicode = ACIPRules.getUnicodeFor(s.getText(), false);
|
||||||
if (null != tdoc) {
|
if (null != tdoc) {
|
||||||
|
@ -406,6 +418,8 @@ public class ACIPConverter {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
throw new Error("forgot a case");
|
||||||
}
|
}
|
||||||
if (null != writer && null == unicode)
|
if (null != writer && null == unicode)
|
||||||
throw new Error("FIXME: make this an assertion 1");
|
throw new Error("FIXME: make this an assertion 1");
|
||||||
|
|
|
@ -21,9 +21,12 @@ package org.thdl.tib.text.ttt;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
import org.thdl.tib.text.DuffCode;
|
import org.thdl.tib.text.DuffCode;
|
||||||
|
import org.thdl.tib.text.THDLWylieConstants;
|
||||||
import org.thdl.tib.text.TibetanMachineWeb;
|
import org.thdl.tib.text.TibetanMachineWeb;
|
||||||
|
import org.thdl.tib.text.TibTextUtils;
|
||||||
|
|
||||||
/** Canonizes some facts regarding the ACIP transcription system.
|
/** Canonizes some facts regarding the ACIP transcription system.
|
||||||
* @author David Chandler */
|
* @author David Chandler */
|
||||||
|
@ -460,38 +463,41 @@ class ACIPRules {
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/** DLC DOC: Gets the duffcodes for vowel, such that they look good with hashKey, and appends them to r. */
|
/** Gets the duffcodes for vowel, such that they look good with
|
||||||
static void getDuffForACIPVowel(ArrayList r, String hashKey, String vowel) {
|
* the stack with hash key hashKey, and appends them to r. */
|
||||||
|
static void getDuffForACIPVowel(ArrayList r, DuffCode preceding, String vowel) {
|
||||||
if (null == vowel) return;
|
if (null == vowel) return;
|
||||||
if (null == getWylieForACIPVowel(vowel)) // FIXME: expensive assertion! Use assert.
|
if (null == getWylieForACIPVowel(vowel)) // FIXME: expensive assertion! Use assert.
|
||||||
throw new IllegalArgumentException("Vowel " + vowel + " isn't in the small set of vowels we handle correctly.");
|
throw new IllegalArgumentException("Vowel " + vowel + " isn't in the small set of vowels we handle correctly.");
|
||||||
if (!TibetanMachineWeb.isKnownHashKey(hashKey)) // FIXME: expensive assertion! Use assert.
|
|
||||||
throw new IllegalArgumentException("bad hashKey");
|
|
||||||
|
|
||||||
// Order matters here.
|
// Order matters here.
|
||||||
if (vowel.indexOf("'U") >= 0)
|
if (vowel.startsWith("A")) {
|
||||||
r.add(TibetanMachineWeb.getVowel(hashKey, TibetanMachineWeb.VOWEL_U));
|
TibTextUtils.getVowel(r, preceding, THDLWylieConstants.WYLIE_aVOWEL);
|
||||||
else {
|
} else if (vowel.indexOf("'U") >= 0) {
|
||||||
|
TibTextUtils.getVowel(r, preceding, "U");
|
||||||
|
} else {
|
||||||
if (vowel.indexOf('\'') >= 0)
|
if (vowel.indexOf('\'') >= 0)
|
||||||
r.add(TibetanMachineWeb.getVowel(hashKey, TibetanMachineWeb.VOWEL_A));
|
TibTextUtils.getVowel(r, preceding, THDLWylieConstants.A_VOWEL);
|
||||||
if (vowel.indexOf("EE") >= 0)
|
if (vowel.indexOf("EE") >= 0)
|
||||||
r.add(TibetanMachineWeb.getGlyph("ai"));
|
TibTextUtils.getVowel(r, preceding, THDLWylieConstants.ai_VOWEL);
|
||||||
else if (vowel.indexOf('E') >= 0)
|
else if (vowel.indexOf('E') >= 0)
|
||||||
r.add(TibetanMachineWeb.getVowel(hashKey, TibetanMachineWeb.VOWEL_e));
|
TibTextUtils.getVowel(r, preceding, THDLWylieConstants.e_VOWEL);
|
||||||
if (vowel.indexOf("OO") >= 0)
|
if (vowel.indexOf("OO") >= 0)
|
||||||
r.add(TibetanMachineWeb.getGlyph("au"));
|
TibTextUtils.getVowel(r, preceding, THDLWylieConstants.au_VOWEL);
|
||||||
else if (vowel.indexOf('O') >= 0)
|
else if (vowel.indexOf('O') >= 0)
|
||||||
r.add(TibetanMachineWeb.getVowel(hashKey, TibetanMachineWeb.VOWEL_o));
|
TibTextUtils.getVowel(r, preceding, THDLWylieConstants.o_VOWEL);
|
||||||
if (vowel.indexOf('I') >= 0)
|
if (vowel.indexOf('I') >= 0)
|
||||||
r.add(TibetanMachineWeb.getVowel(hashKey, TibetanMachineWeb.VOWEL_i));
|
TibTextUtils.getVowel(r, preceding, THDLWylieConstants.i_VOWEL);
|
||||||
if (vowel.indexOf('U') >= 0)
|
if (vowel.indexOf('U') >= 0)
|
||||||
r.add(TibetanMachineWeb.getVowel(hashKey, TibetanMachineWeb.VOWEL_u));
|
TibTextUtils.getVowel(r, preceding, THDLWylieConstants.u_VOWEL);
|
||||||
if (vowel.indexOf('i') >= 0)
|
if (vowel.indexOf('i') >= 0)
|
||||||
r.add(TibetanMachineWeb.getGlyph("-i"));
|
TibTextUtils.getVowel(r, preceding, THDLWylieConstants.reverse_i_VOWEL);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (vowel.indexOf('m') >= 0)
|
if (vowel.indexOf('m') >= 0)
|
||||||
r.add(TibetanMachineWeb.getGlyph("M"));
|
r.add(TibetanMachineWeb.getGlyph("M"));
|
||||||
if (vowel.indexOf(':') >= 0)
|
if (vowel.indexOf(':') >= 0)
|
||||||
r.add(TibetanMachineWeb.getGlyph("H"));
|
r.add(TibetanMachineWeb.getGlyph("H"));
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -778,11 +778,22 @@ public class ACIPTshegBarScanner {
|
||||||
// careful, so "KA\r\n" and "GA\n" appear where "KA
|
// careful, so "KA\r\n" and "GA\n" appear where "KA
|
||||||
// \r\n" and "GA \n" should appear.
|
// \r\n" and "GA \n" should appear.
|
||||||
if (('\r' == ch
|
if (('\r' == ch
|
||||||
|| '\n' == ch)
|
|| ('\n' == ch && i > 0 && s.charAt(i - 1) != '\r'))
|
||||||
&& !al.isEmpty()
|
&& !al.isEmpty()
|
||||||
&& ((ACIPString)al.get(al.size() - 1)).getType() == ACIPString.TIBETAN_NON_PUNCTUATION) {
|
&& ((ACIPString)al.get(al.size() - 1)).getType() == ACIPString.TIBETAN_NON_PUNCTUATION) {
|
||||||
al.add(new ACIPString(" ",
|
al.add(new ACIPString(" ", ACIPString.TIBETAN_PUNCTUATION));
|
||||||
ACIPString.TIBETAN_PUNCTUATION));
|
}
|
||||||
|
|
||||||
|
// "DANG,\nLHAG" is really "DANG, LHAG". But always? Not if you have "MDO,\n\nKA...".
|
||||||
|
if (('\r' == ch
|
||||||
|
|| ('\n' == ch && i > 0 && s.charAt(i - 1) != '\r'))
|
||||||
|
&& !al.isEmpty()
|
||||||
|
&& ((ACIPString)al.get(al.size() - 1)).getType() == ACIPString.TIBETAN_PUNCTUATION
|
||||||
|
&& ((ACIPString)al.get(al.size() - 1)).getText().equals(",")
|
||||||
|
&& s.charAt(i-1) == ','
|
||||||
|
&& (i + (('\r' == ch) ? 2 : 1) < sl
|
||||||
|
&& (s.charAt(i+(('\r' == ch) ? 2 : 1)) != ch))) {
|
||||||
|
al.add(new ACIPString(" ", ACIPString.TIBETAN_PUNCTUATION));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Don't add in a "\r\n" or "\n" unless there's a
|
// Don't add in a "\r\n" or "\n" unless there's a
|
||||||
|
|
|
@ -19,6 +19,7 @@ Contributor(s): ______________________________________.
|
||||||
package org.thdl.tib.text.ttt;
|
package org.thdl.tib.text.ttt;
|
||||||
|
|
||||||
import org.thdl.tib.text.TibetanMachineWeb;
|
import org.thdl.tib.text.TibetanMachineWeb;
|
||||||
|
import org.thdl.tib.text.DuffCode;
|
||||||
import org.thdl.tib.text.TGCPair;
|
import org.thdl.tib.text.TGCPair;
|
||||||
import org.thdl.util.ThdlDebug;
|
import org.thdl.util.ThdlDebug;
|
||||||
|
|
||||||
|
@ -612,6 +613,7 @@ class TPairList {
|
||||||
/** Appends the DuffCodes that correspond to this grapheme cluster
|
/** Appends the DuffCodes that correspond to this grapheme cluster
|
||||||
* to duff. Assumes this is one grapheme cluster. */
|
* to duff. Assumes this is one grapheme cluster. */
|
||||||
void getDuff(ArrayList duff) {
|
void getDuff(ArrayList duff) {
|
||||||
|
int previousSize = duff.size();
|
||||||
StringBuffer wylieForConsonant = new StringBuffer();
|
StringBuffer wylieForConsonant = new StringBuffer();
|
||||||
for (int x = 0; x + 1 < size(); x++) {
|
for (int x = 0; x + 1 < size(); x++) {
|
||||||
wylieForConsonant.append(get(x).getWylie(false));
|
wylieForConsonant.append(get(x).getWylie(false));
|
||||||
|
@ -625,8 +627,15 @@ class TPairList {
|
||||||
throw new Error("How did this happen?");
|
throw new Error("How did this happen?");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (lastPair.getRight() == null || lastPair.equals("-")) {
|
||||||
duff.add(TibetanMachineWeb.getGlyph(hashKey));
|
duff.add(TibetanMachineWeb.getGlyph(hashKey));
|
||||||
ACIPRules.getDuffForACIPVowel(duff, hashKey, lastPair.getRight());
|
} else {
|
||||||
|
ACIPRules.getDuffForACIPVowel(duff,
|
||||||
|
TibetanMachineWeb.getGlyph(hashKey),
|
||||||
|
lastPair.getRight());
|
||||||
|
}
|
||||||
|
if (previousSize == duff.size())
|
||||||
|
throw new Error("TPairList with no duffs? " + toString()); // DLC FIXME: change to assertion.
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// DLC FIXME: handle 'o' and 'x', e.g. KAo and NYAx.
|
// DLC FIXME: handle 'o' and 'x', e.g. KAo and NYAx.
|
||||||
|
|
|
@ -217,7 +217,7 @@ class TStackList {
|
||||||
}
|
}
|
||||||
return u.toString();
|
return u.toString();
|
||||||
}
|
}
|
||||||
/** DLC DOC */
|
/** Returns the DuffCodes corresponding to this stack list. */
|
||||||
DuffCode[] getDuff() {
|
DuffCode[] getDuff() {
|
||||||
ArrayList al = new ArrayList(size()*2); // rough estimate
|
ArrayList al = new ArrayList(size()*2); // rough estimate
|
||||||
int count = 0;
|
int count = 0;
|
||||||
|
|
Loading…
Reference in a new issue