Fixed ACIP->TMW vowels like 'I etc.

Fixed ACIP->Unicode/TMW for BDE, which should be B-DE, not B+DE, because the former is legal Tibetan.

The ACIP->EWTS subroutine has improved.

TMW->Wylie and TMW->ACIP are improved in error cases.

TMW->ACIP has friendly embedded error messages now.
This commit is contained in:
dchandler 2003-09-12 05:06:37 +00:00
parent 16817d0b8e
commit 115d0e0e6c
14 changed files with 689 additions and 472 deletions

View file

@ -345,18 +345,24 @@ public class DuffPane extends TibetanPane implements FocusListener {
* The keymap defines a default behavior for key presses * The keymap defines a default behavior for key presses
* in both Tibetan and Roman mode. * in both Tibetan and Roman mode.
*/ */
private void setupKeymap() { private void setupKeymap() {
Action defaultAction = new AbstractAction() { Action defaultAction = new AbstractAction() {
public void actionPerformed(ActionEvent e) { public void actionPerformed(ActionEvent e) {
DuffPane.this.performKeyStroke(e.getModifiers(), try {
e.getActionCommand()); DuffPane.this.performKeyStroke(e.getModifiers(),
} e.getActionCommand());
}; } catch (Throwable t) {
createActionTable(this); System.err.println("JSKAD ERROR: " + t);
Keymap keymap = addKeymap("DuffBindings", getKeymap()); t.printStackTrace(System.err);
keymap.setDefaultAction(defaultAction); System.exit(1);
setKeymap(keymap); }
} }
};
createActionTable(this);
Keymap keymap = addKeymap("DuffBindings", getKeymap());
keymap.setDefaultAction(defaultAction);
setKeymap(keymap);
}
private void createActionTable(JTextComponent textComponent) { private void createActionTable(JTextComponent textComponent) {
actions = new Hashtable(); actions = new Hashtable();
@ -746,78 +752,82 @@ public class DuffPane extends TibetanPane implements FocusListener {
* *
* @param v the vowel (in Wylie) you want to insert * @param v the vowel (in Wylie) you want to insert
*/ */
private void putVowel(String v) { private void putVowel(String v) {
if (caret.getDot()==0) { if (caret.getDot()==0) {
if (!TibetanMachineWeb.isAChenRequiredBeforeVowel()) if (!TibetanMachineWeb.isAChenRequiredBeforeVowel())
printAChenWithVowel(v); printAChenWithVowel(v);
return; return;
} }
AttributeSet attr = getTibDoc().getCharacterElement(caret.getDot()-1).getAttributes(); AttributeSet attr = getTibDoc().getCharacterElement(caret.getDot()-1).getAttributes();
String fontName = StyleConstants.getFontFamily(attr); String fontName = StyleConstants.getFontFamily(attr);
int fontNum; int fontNum;
if (0 != (fontNum = TibetanMachineWeb.getTMWFontNumber(fontName))) { if (0 != (fontNum = TibetanMachineWeb.getTMWFontNumber(fontName))) {
try { try {
char c2 = getTibDoc().getText(caret.getDot()-1, 1).charAt(0); char c2 = getTibDoc().getText(caret.getDot()-1, 1).charAt(0);
int k = (int)c2; int k = (int)c2;
if (k<32 || k>126) { //if previous character is formatting or some other non-character if (k<32 || k>126) { //if previous character is formatting or some other non-character
if (!TibetanMachineWeb.isAChenRequiredBeforeVowel()) if (!TibetanMachineWeb.isAChenRequiredBeforeVowel())
printAChenWithVowel(v); printAChenWithVowel(v);
return; return;
} }
String wylie String wylie
= TibetanMachineWeb.getWylieForGlyph(fontNum, = TibetanMachineWeb.getWylieForGlyph(fontNum,
k, k,
TibTextUtils.weDoNotCareIfThereIsCorrespondingWylieOrNot); TibTextUtils.weDoNotCareIfThereIsCorrespondingWylieOrNot);
if (TibetanMachineWeb.isWyliePunc(wylie)) { if (TibetanMachineWeb.isWyliePunc(wylie)) {
if (charList.isEmpty() && !TibetanMachineWeb.isAChenRequiredBeforeVowel()) { if (charList.isEmpty() && !TibetanMachineWeb.isAChenRequiredBeforeVowel()) {
printAChenWithVowel(v); printAChenWithVowel(v);
return; return;
} }
} }
DuffCode dc_1 = null; DuffCode dc_1 = null;
DuffCode dc_2 = new DuffCode(fontNum, c2); DuffCode dc_2 = new DuffCode(fontNum, c2);
if (caret.getDot() >= 2) { if (caret.getDot() >= 2) {
attr = getTibDoc().getCharacterElement(caret.getDot()-2).getAttributes(); attr = getTibDoc().getCharacterElement(caret.getDot()-2).getAttributes();
fontName = StyleConstants.getFontFamily(attr); fontName = StyleConstants.getFontFamily(attr);
if (0 != (fontNum = TibetanMachineWeb.getTMWFontNumber(fontName))) { if (0 != (fontNum = TibetanMachineWeb.getTMWFontNumber(fontName))) {
c2 = getTibDoc().getText(caret.getDot()-2, 1).charAt(0); c2 = getTibDoc().getText(caret.getDot()-2, 1).charAt(0);
dc_1 = new DuffCode(fontNum, c2); dc_1 = new DuffCode(fontNum, c2);
} }
} }
java.util.List before_vowel = new ArrayList(); java.util.List before_vowel = new ArrayList();
if (null != dc_1) if (null != dc_1)
before_vowel.add(dc_1); before_vowel.add(dc_1);
before_vowel.add(dc_2); before_vowel.add(dc_2);
java.util.List after_vowel = new ArrayList(); java.util.List after_vowel = new ArrayList();
TibTextUtils.getVowel(after_vowel, dc_1, dc_2, v); try {
TibTextUtils.getVowel(after_vowel, dc_1, dc_2, v);
} catch (IllegalArgumentException e) {
// drop this vowel silently.
}
if (after_vowel.size() >= before_vowel.size()) { if (after_vowel.size() >= before_vowel.size()) {
setNumberOfGlyphsForLastVowel(after_vowel.size() setNumberOfGlyphsForLastVowel(after_vowel.size()
- before_vowel.size()); - before_vowel.size());
} else { } else {
setNumberOfGlyphsForLastVowel(0); setNumberOfGlyphsForLastVowel(0);
ThdlDebug.noteIffyCode(); // I don't think this can ever happen, but... // can happen for pou (as opposed to puo) (FIXME)
} }
redrawGlyphs(before_vowel, after_vowel); redrawGlyphs(before_vowel, after_vowel);
} }
catch(BadLocationException ble) { catch(BadLocationException ble) {
System.out.println("no--can't insert here"); System.out.println("no--can't insert here");
ThdlDebug.noteIffyCode(); ThdlDebug.noteIffyCode();
} }
} }
else { //0 font means not Tibetan font, so begin new Tibetan font section else { //0 font means not Tibetan font, so begin new Tibetan font section
if (!TibetanMachineWeb.isAChenRequiredBeforeVowel()) if (!TibetanMachineWeb.isAChenRequiredBeforeVowel())
printAChenWithVowel(v); printAChenWithVowel(v);
} }
} }
/** /**
@ -840,14 +850,18 @@ public class DuffPane extends TibetanPane implements FocusListener {
* *
* @param v the vowel (in Wylie) which you want to print with ACHEN * @param v the vowel (in Wylie) which you want to print with ACHEN
*/ */
private void printAChenWithVowel(String v) { private void printAChenWithVowel(String v) {
DuffCode[] dc_array = (DuffCode[])TibetanMachineWeb.getTibHash().get(TibetanMachineWeb.ACHEN); DuffCode[] dc_array = (DuffCode[])TibetanMachineWeb.getTibHash().get(TibetanMachineWeb.ACHEN);
DuffCode dc = dc_array[TibetanMachineWeb.TMW]; DuffCode dc = dc_array[TibetanMachineWeb.TMW];
java.util.List achenlist = new ArrayList(); java.util.List achenlist = new ArrayList();
TibTextUtils.getVowel(achenlist, dc, v); try {
DuffData[] dd = TibTextUtils.convertGlyphs(achenlist); TibTextUtils.getVowel(achenlist, dc, v);
getTibDoc().insertDuff(caret.getDot(), dd); } catch (IllegalArgumentException e) {
} // drop this vowel silently.
}
DuffData[] dd = TibTextUtils.convertGlyphs(achenlist);
getTibDoc().insertDuff(caret.getDot(), dd);
}
/** /**
* Puts a bindu/anusvara at the current caret position. * Puts a bindu/anusvara at the current caret position.
@ -884,7 +898,8 @@ public class DuffPane extends TibetanPane implements FocusListener {
DuffCode dc = new DuffCode(fontNum, c2); DuffCode dc = new DuffCode(fontNum, c2);
java.util.List beforecaret = new ArrayList(); java.util.List beforecaret = new ArrayList();
beforecaret.add(dc); beforecaret.add(dc);
java.util.List bindulist = TibTextUtils.getBindu(dc); java.util.List bindulist = new LinkedList();
TibTextUtils.getBindu(bindulist, dc);
redrawGlyphs(beforecaret, bindulist); redrawGlyphs(beforecaret, bindulist);
initKeyboard(); initKeyboard();
return; return;
@ -895,7 +910,9 @@ public class DuffPane extends TibetanPane implements FocusListener {
} }
} }
DuffData[] dd = TibTextUtils.convertGlyphs(TibTextUtils.getBindu(null)); java.util.List binduList = new LinkedList();
TibTextUtils.getBindu(binduList, null);
DuffData[] dd = TibTextUtils.convertGlyphs(binduList);
getTibDoc().insertDuff(caret.getDot(), dd); getTibDoc().insertDuff(caret.getDot(), dd);
initKeyboard(); initKeyboard();
} }

View file

@ -37,13 +37,13 @@ rgyal ba kun dngos mtsho skyes rdo rje bstan pa'i rtsa lag thams cad mkhyen pa z
bka' drin gzugs can dbyig 'dzin lto 'dir shong 'gyur min na kun mkhyen srang las gang gis gzhal//\par bka' drin gzugs can dbyig 'dzin lto 'dir shong 'gyur min na kun mkhyen srang las gang gis gzhal//\par
\par \par
li khri'i lcug phran mkhyen pa'i snang ba can//\par li khri'i lcug phran mkhyen pa'i snang ba can//\par
'jam mgon bloX. yi lang tsho bazaX.nga po'i tshon//\par 'jam mgon bloX yi lang tsho bazaX.nga po'i tshon//\par
kha dog so sor bkra ba'i gragaX.sa paX.'i rgyan//\par kha dog so sor bkra ba'i gragaX.sa paX.'i rgyan//\par
phyogs bral rna lung 'god mkhas rtag tu rgyal//\f2\fs44\i0\b0\ul0\cf0\par phyogs bral rna lung 'god mkhas rtag tu rgyal//\f2\fs44\i0\b0\ul0\cf0\par
\par \par
\f1\fs28\i0\b0\ul0 dpal ldan chos kyi rang bzhin ngos yangs par//\par \f1\fs28\i0\b0\ul0 dpal ldan chos kyi rang bzhin ngos yangs par//\par
gnas lnga'i bang mdzod 'byor par lhun grub pa'i//\par gnas lnga'i bang mdzod 'byor par lhun grub pa'i//\par
mnga' sgyur bgrod byed ban+d+hu dziA wa ka/\par mnga' sgyur bgrod byed ban+d+hu dzI wa ka/\par
rmad byung cod pan mchog tu rnam par bsngags//\par rmad byung cod pan mchog tu rnam par bsngags//\par
\par \par
rgya hor lung dang rigs kyis mngon mtho zhing //\par rgya hor lung dang rigs kyis mngon mtho zhing //\par
@ -77,7 +77,7 @@ deb ther rdzogs ldan gsar pa'i dga' ston mgron//\par
\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 de yang ston pa 'di nyid kyi gdung la nyi ma'i gnyen dang bu ram shing pa shAkya zhes brjod pa ni/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 yul gru 'dzin du rgyal po brgya tham pa byung ba'i mtha' ma rna ba can gyi bu gau ta ma dang b+ha ra d+h+wa dza gnyis las/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 sngon mas rab tu byung ste lo ma'i spyil po yid du 'ong ba'i bsam gtan gyi khang bur gnas pa'i tshe/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 g.yo can pad+ma'i rtsa lag dang bzang mo dga' mgur spyod pa'i dus las yol bas/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 bzang mo bas.d pa'i ral gri khrag can lo ma'i spyil po'i nye 'dabs su bor ba'i rkyen gyis gau ta ma bsal shing la bskyon pa'i dus/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 drang srong mdog nag gi rdzu 'phrul gyis gru char gyi thigs pa dang /\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 dri bzhon lus la reg pa'i rkyen gyis 'dod pa'i gnas rjes su dran pa'i yid kyi shing rtas drangs te/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 khams kyi dwangs ma gnyis lhan cig tu lhung ba las sgo nga gnyis su gyur pa nyi 'od kyis bskyangs shing khye'u gnyis bu ram gyi shing gseb tu zhugs pas ming de ltar du grags la/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 bu ram shing pa'i brgyud la rgyal po brgya byung ba'i mtha' ma 'phags skyes po/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 'phags skyes po la sras bzhi byung yang btsun mo 'das te slar stobs ldan gyi rgyal po gzhan zhig gi bu mo khab tu blangs par sras byung na rgyal srid du dbang bskur bar khas blangs pas dam bcas/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 sras sngon ma rnams spyugs pa rang rang gi sring mo dang bcas te chu bo skal ldan shing rta'i 'gram du ming sring lhan cig tu 'dus pa las shAkya'i brgyud ces grags pa yin no//\par \f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 de yang ston pa 'di nyid kyi gdung la nyi ma'i gnyen dang bu ram shing pa shAkya zhes brjod pa ni/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 yul gru 'dzin du rgyal po brgya tham pa byung ba'i mtha' ma rna ba can gyi bu gau ta ma dang b+ha ra d+h+wa dza gnyis las/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 sngon mas rab tu byung ste lo ma'i spyil po yid du 'ong ba'i bsam gtan gyi khang bur gnas pa'i tshe/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 g.yo can pad+ma'i rtsa lag dang bzang mo dga' mgur spyod pa'i dus las yol bas/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 bzang mo bas.d pa'i ral gri khrag can lo ma'i spyil po'i nye 'dabs su bor ba'i rkyen gyis gau ta ma bsal shing la bskyon pa'i dus/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 drang srong mdog nag gi rdzu 'phrul gyis gru char gyi thigs pa dang /\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 dri bzhon lus la reg pa'i rkyen gyis 'dod pa'i gnas rjes su dran pa'i yid kyi shing rtas drangs te/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 khams kyi dwangs ma gnyis lhan cig tu lhung ba las sgo nga gnyis su gyur pa nyi 'od kyis bskyangs shing khye'u gnyis bu ram gyi shing gseb tu zhugs pas ming de ltar du grags la/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 bu ram shing pa'i brgyud la rgyal po brgya byung ba'i mtha' ma 'phags skyes po/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 'phags skyes po la sras bzhi byung yang btsun mo 'das te slar stobs ldan gyi rgyal po gzhan zhig gi bu mo khab tu blangs par sras byung na rgyal srid du dbang bskur bar khas blangs pas dam bcas/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 sras sngon ma rnams spyugs pa rang rang gi sring mo dang bcas te chu bo skal ldan shing rta'i 'gram du ming sring lhan cig tu 'dus pa las shAkya'i brgyud ces grags pa yin no//\par
\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 de yang kun mkhyen nyi ma'i gnyen 'di nyid 'khrungs lor mkhas grub rnams kyi bzhed tshul mang du mchis kyang don grub ces pa sa lug dbyar zla 'bring po'i chu stod kyi nya ba'i tshes bco lnga'i dus su/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 yum gso sbyong la gnas pa'i lhums su glang po che'i phrug gu thal dkar gyi rnam par gyur nas zhugs/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 zla ba bcu phal cher lon pa legs par sbyar ba lha'i skad du ru dra zhes pa bsil ldan pa rnams drag por brjod pa/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 rgya nag pa dbang thang dang bstun pa'i ming ging shing /\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 bod yul du lcags pho spre'ura 'bod pa'i dpyid tha sa gas nya ba me tog can gyi zla ba/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 rgya nag pa si yol du brjod pa'i yar ngo'i tshes bdun gyi res gza' 'od zer bdun pa dang /\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 las skar rgyal la babs pa na lum+bi ni'i nags su sku bltams/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 de nas lang tsho'i kun+da rnam par rgyas pa'i zil mngar gyi sgyu rtsal drug cu rtsa bzhi'i yal 'dab kyi yon tan kun la rang gir bgyis shing /\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 pad+mo'i snyems pa thogs pa'i ri dwags mig can gyi don yongs su tshang ba'i grags 'dzin ma dang sa mtsho ma sogs btsun mor dbang bskur te lo nyi shu rtsa dgu'i bar du nyi 'og gyi rgyal phran gnyis 'thung gi spyi bor 'jigs pa med pa'i rgyal thabs kyi bya ba'i skad gsang mthon pos rang byan du 'gying bar dbang thob na'ang /\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 mchog gi sprul sku'i mdzad pa ston pa'i ched du/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 dgung lo nyer dgu pa kun 'dzin zhes pa sa pho byi ba la rang byung gi sgo nas/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 rab byung gi dngos po yongs su rdzogs te/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 lo drug tu dka' ba spyad pas lang tsho phun sum tshogs pa las gzhan du gyur pa na/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 dga' mo dang dga' stobs mas ba stong gi 'o ma lan bcu drug gi bar du nying khur byas pa'i 'o thug sbrang rtsi dang sbyar ba'i kun tu bzang po'i mchod sprin gyis sku su war+Na'i mchod sdong nyi gzhon 'bum gyis 'khyud pa ltar gyur te rdo rje gdan du byang chub kyi shing drung na 'chi med bdag pos rtswa 'tshong gyi rnam par sprul te phul ba'i rtswa'i gdan la bzhugs nas/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 chu srin rgyal mtshan can sde dang bcas pa'i g.yul las rnam par rgyal ba'i ba dan nam mkha'i mtha' klas par bsgrengs te/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 dgung lo so lnga pa rgyal ba zhes pa shing rta'i sa gas nya ba'i bco lnga'i skya rengs shar ba na ye shes mchog gi bdud rtsi mngon du mdzad do/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 /mdo las/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 de'i nyin mo zla ba sgra gcan gyis bzung ba dang /\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 sgra gcan 'dzin dang bdud rtsi zas kyi khye'u zhig kyang skyes par bshad pa'i gza' 'dzin gyi ri mo ni gza' gnas su gcig chu tshod so brgyad zla skar gyi skar gnas su bcu drug chu tshod stong pa sgra gcan gdong gi skar mar bcu drug chu tshod nyer dgu byung bas gza' 'dzin gyi ri mo ni tshad ma'i aA dar+sha gtsang ma'i ngos su gsal bar shar ba yin no/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 /\par \f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 de yang kun mkhyen nyi ma'i gnyen 'di nyid 'khrungs lor mkhas grub rnams kyi bzhed tshul mang du mchis kyang don grub ces pa sa lug dbyar zla 'bring po'i chu stod kyi nya ba'i tshes bco lnga'i dus su/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 yum gso sbyong la gnas pa'i lhums su glang po che'i phrug gu thal dkar gyi rnam par gyur nas zhugs/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 zla ba bcu phal cher lon pa legs par sbyar ba lha'i skad du ru dra zhes pa bsil ldan pa rnams drag por brjod pa/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 rgya nag pa dbang thang dang bstun pa'i ming ging shing /\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 bod yul du lcags pho spre'ura 'bod pa'i dpyid tha sa gas nya ba me tog can gyi zla ba/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 rgya nag pa si yol du brjod pa'i yar ngo'i tshes bdun gyi res gza' 'od zer bdun pa dang /\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 las skar rgyal la babs pa na lum+bi ni'i nags su sku bltams/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 de nas lang tsho'i kun+da rnam par rgyas pa'i zil mngar gyi sgyu rtsal drug cu rtsa bzhi'i yal 'dab kyi yon tan kun la rang gir bgyis shing /\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 pad+mo'i snyems pa thogs pa'i ri dwags mig can gyi don yongs su tshang ba'i grags 'dzin ma dang sa mtsho ma sogs btsun mor dbang bskur te lo nyi shu rtsa dgu'i bar du nyi 'og gyi rgyal phran gnyis 'thung gi spyi bor 'jigs pa med pa'i rgyal thabs kyi bya ba'i skad gsang mthon pos rang byan du 'gying bar dbang thob na'ang /\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 mchog gi sprul sku'i mdzad pa ston pa'i ched du/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 dgung lo nyer dgu pa kun 'dzin zhes pa sa pho byi ba la rang byung gi sgo nas/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 rab byung gi dngos po yongs su rdzogs te/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 lo drug tu dka' ba spyad pas lang tsho phun sum tshogs pa las gzhan du gyur pa na/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 dga' mo dang dga' stobs mas ba stong gi 'o ma lan bcu drug gi bar du nying khur byas pa'i 'o thug sbrang rtsi dang sbyar ba'i kun tu bzang po'i mchod sprin gyis sku su war+Na'i mchod sdong nyi gzhon 'bum gyis 'khyud pa ltar gyur te rdo rje gdan du byang chub kyi shing drung na 'chi med bdag pos rtswa 'tshong gyi rnam par sprul te phul ba'i rtswa'i gdan la bzhugs nas/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 chu srin rgyal mtshan can sde dang bcas pa'i g.yul las rnam par rgyal ba'i ba dan nam mkha'i mtha' klas par bsgrengs te/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 dgung lo so lnga pa rgyal ba zhes pa shing rta'i sa gas nya ba'i bco lnga'i skya rengs shar ba na ye shes mchog gi bdud rtsi mngon du mdzad do/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 /mdo las/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 de'i nyin mo zla ba sgra gcan gyis bzung ba dang /\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 sgra gcan 'dzin dang bdud rtsi zas kyi khye'u zhig kyang skyes par bshad pa'i gza' 'dzin gyi ri mo ni gza' gnas su gcig chu tshod so brgyad zla skar gyi skar gnas su bcu drug chu tshod stong pa sgra gcan gdong gi skar mar bcu drug chu tshod nyer dgu byung bas gza' 'dzin gyi ri mo ni tshad ma'i aA dar+sha gtsang ma'i ngos su gsal bar shar ba yin no/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 /\par
\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 de nas zhag zhe dgu'i bar du dam pa'i chos kyi bdud rtsi'i sgo brgya ma phye ba'i tshul bstan pas/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 'jig rten gyi byed po gser mngal can gyis skal pa mchog gi bzang po'i dpal dang ldan pa'i don du gsol ba la brten nas/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 mchog dman gyi gdul bya so so'i blo dang 'tsham par zab rgyas chos kyi sgo glegs rnam par bkral nas theg pa che chung gi rigs can gyi yol go/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 rang 'dod pa'i tshogs mtha' dag gcig tu 'jo ba'i ro zad mi shes pa mkha' khyab tu bro ba'i 'khor lo bskor te/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 mthar gyad kyi yul rtswa mchog gi grong du dgung lo gya gcig pa dpa' bo zhes pa lcags 'brug gi lor mi 'gyur ba'i bde ba chen po chos kyi sku'i mkha' klong du sna tshogs pa'i sprul sku'i zlos gar gyis rol pa'i chu 'dzin gzugs sku tha mi dad pa'i mdzad pa bstan to/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 /\par \f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 de nas zhag zhe dgu'i bar du dam pa'i chos kyi bdud rtsi'i sgo brgya ma phye ba'i tshul bstan pas/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 'jig rten gyi byed po gser mngal can gyis skal pa mchog gi bzang po'i dpal dang ldan pa'i don du gsol ba la brten nas/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 mchog dman gyi gdul bya so so'i blo dang 'tsham par zab rgyas chos kyi sgo glegs rnam par bkral nas theg pa che chung gi rigs can gyi yol go/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 rang 'dod pa'i tshogs mtha' dag gcig tu 'jo ba'i ro zad mi shes pa mkha' khyab tu bro ba'i 'khor lo bskor te/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 mthar gyad kyi yul rtswa mchog gi grong du dgung lo gya gcig pa dpa' bo zhes pa lcags 'brug gi lor mi 'gyur ba'i bde ba chen po chos kyi sku'i mkha' klong du sna tshogs pa'i sprul sku'i zlos gar gyis rol pa'i chu 'dzin gzugs sku tha mi dad pa'i mdzad pa bstan to/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 /\par
\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 de ltar bstan pa rin po che yul dbus 'gyur nas char dus kyi dal 'gro'i klung rang babs su 'gyur ba'i gshis lugs bzhin tha grur khyab pa las/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 'gro ba'i bla ma shAkya seng ge gya gcig pa lcags 'brug lo nag nyar shriA d+hA n+ya ka Ta kar gsang ba kun gyi gan mdzod rnam par 'dzin pa'i mi'i dbang phyug zla ba bzang por sho lo ka stong phrag bcu gnyis kyi bdag nyid can gyi dpal dang po'i sangs rgyas dus kyi 'khor lo'i rgyud gsungs pa sham+b+ha lar spyan drangs nas ma la ya'i skyed tshal du sku gsungs thugs yongs su rdzogs pa'i dkyil 'khor gyi snang brnyan ye shes kyi gzi byin mngon par 'bar ba'i sgyu ma'i gar gyis rtsen pa'i 'dren byed kyi dga' ston gsar pa bskrun te/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 rdo rje theg pa'i bgo skal ka lA pa'i grong khyer gyi skye rgur 'gyed pa'i mu mtha' bral ba'i mdzad pa nas bzung 'jigs pa med pa'i gdong lnga'i khri la bgrod byed rtsibs stong 'khor lo'i ri mo mchog tu bkra ba'i chos rgyal rigs ldan sum cu rtsa gsum du byon pa mtha' dag gis phyi nang gzhan gsum sbyor ba'i gsang chen nA da'i sgra dbyangs kyi nor bu'i 'phyang 'phrul las ldan gyi rna bar spud par mdzad pa'i tha ma/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 drag po'i 'khor lo can gyis dgung lo dgu bcu rtsa brgyad par kla klo'i dpung tshogs mtha' dag ming gi lhag mar byas nas snga na med pas sa chen po'i khor yug kun tu khyab par mdzad do/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 /de yang rigs ldan drag po dgung lo lnga bcu'i steng du kla klo bcom par bzhed pa ltar na/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 kla klo'i gnas tshad stong dang brgyad brgyar bshad pa las lo grangs bzhi bcu rtsa bdun tsam mi 'grig pa'i skyon yod cing /\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 de dag rags rtsis su 'chad na zhib rtsis ni 'byung ba'i skabs med pas gzur gnas kyi shes ldan rnams soms shig\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 de ltar sham b+ha las mtshon te/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 ao rgyan/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 bal yul/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 rgya hor sogs su bstan pa byung tshul ni mtha' yas kyang mang gis dogs pas ma spros so/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 /\par \f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 de ltar bstan pa rin po che yul dbus 'gyur nas char dus kyi dal 'gro'i klung rang babs su 'gyur ba'i gshis lugs bzhin tha grur khyab pa las/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 'gro ba'i bla ma shAkya seng ge gya gcig pa lcags 'brug lo nag nyar shrI d+hA n+ya ka Ta kar gsang ba kun gyi gan mdzod rnam par 'dzin pa'i mi'i dbang phyug zla ba bzang por sho lo ka stong phrag bcu gnyis kyi bdag nyid can gyi dpal dang po'i sangs rgyas dus kyi 'khor lo'i rgyud gsungs pa sham+b+ha lar spyan drangs nas ma la ya'i skyed tshal du sku gsungs thugs yongs su rdzogs pa'i dkyil 'khor gyi snang brnyan ye shes kyi gzi byin mngon par 'bar ba'i sgyu ma'i gar gyis rtsen pa'i 'dren byed kyi dga' ston gsar pa bskrun te/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 rdo rje theg pa'i bgo skal ka lA pa'i grong khyer gyi skye rgur 'gyed pa'i mu mtha' bral ba'i mdzad pa nas bzung 'jigs pa med pa'i gdong lnga'i khri la bgrod byed rtsibs stong 'khor lo'i ri mo mchog tu bkra ba'i chos rgyal rigs ldan sum cu rtsa gsum du byon pa mtha' dag gis phyi nang gzhan gsum sbyor ba'i gsang chen nA da'i sgra dbyangs kyi nor bu'i 'phyang 'phrul las ldan gyi rna bar spud par mdzad pa'i tha ma/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 drag po'i 'khor lo can gyis dgung lo dgu bcu rtsa brgyad par kla klo'i dpung tshogs mtha' dag ming gi lhag mar byas nas snga na med pas sa chen po'i khor yug kun tu khyab par mdzad do/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 /de yang rigs ldan drag po dgung lo lnga bcu'i steng du kla klo bcom par bzhed pa ltar na/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 kla klo'i gnas tshad stong dang brgyad brgyar bshad pa las lo grangs bzhi bcu rtsa bdun tsam mi 'grig pa'i skyon yod cing /\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 de dag rags rtsis su 'chad na zhib rtsis ni 'byung ba'i skabs med pas gzur gnas kyi shes ldan rnams soms shig\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 de ltar sham b+ha las mtshon te/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 ao rgyan/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 bal yul/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 rgya hor sogs su bstan pa byung tshul ni mtha' yas kyang mang gis dogs pas ma spros so/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 /\par
\f4\fs24\i0\b0\ul0\cf0\par \f4\fs24\i0\b0\ul0\cf0\par
\pard\ql\f1\fs28\i0\b0\ul0\par \pard\ql\f1\fs28\i0\b0\ul0\par
\fs24\par \fs24\par

View file

@ -171,10 +171,10 @@ public class TibetanConverter implements FontConverterConstants {
out.println(""); out.println("");
out.println(""); out.println("");
out.println(" In --to... and --acip-to... modes, needs one argument, the name of the"); out.println(" In --to... and --acip-to... modes, needs one argument, the name of the");
out.println(" TibetanMachineWeb RTF"); out.println(" TibetanMachineWeb RTF file (for --to-wylie, --to-wylie-text, --to-acip-text,");
out.println(" file (for --to-wylie, --to-unicode, and --to-tibetan-machine) or the name of"); out.println(" --to-acip, --to-unicode, and --to-tibetan-machine) or the name of");
out.println(" the TibetanMachine RTF file (for --to-tibetan-machine-web) or the name of the"); out.println(" the TibetanMachine RTF file (for --to-tibetan-machine-web) or the name of the");
out.println(" ACIP text file (for --acip-to-unicode). Writes the"); out.println(" ACIP text file (for --acip-to-unicode or --acip-to-tmw). Writes the");
out.println(" result to standard output (after dealing with the curly brace problem if"); out.println(" result to standard output (after dealing with the curly brace problem if");
out.println(" the input is TibetanMachineWeb). Exit code is zero on success, 42 if some"); out.println(" the input is TibetanMachineWeb). Exit code is zero on success, 42 if some");
out.println(" glyphs couldn't be converted (in which case the output is just those glyphs),"); out.println(" glyphs couldn't be converted (in which case the output is just those glyphs),");
@ -364,14 +364,14 @@ public class TibetanConverter implements FontConverterConstants {
+ ((TMW_TO_WYLIE_TEXT == ct) ? 1 : 0) + ((TMW_TO_WYLIE_TEXT == ct) ? 1 : 0)
== 1); == 1);
long numAttemptedReplacements[] = new long[] { 0 }; long numAttemptedReplacements[] = new long[] { 0 };
if (TMW_TO_WYLIE == ct) { if (TMW_TO_WYLIE == ct || TMW_TO_WYLIE_TEXT == ct) {
// Convert to THDL Wylie: // Convert to THDL Wylie:
if (!tdoc.toWylie(0, if (!tdoc.toWylie(0,
tdoc.getLength(), tdoc.getLength(),
numAttemptedReplacements)) { numAttemptedReplacements)) {
exitCode = 44; exitCode = 44;
} }
} else if (TMW_TO_ACIP == ct) { } else if (TMW_TO_ACIP == ct || TMW_TO_ACIP_TEXT == ct) {
// Convert to ACIP: // Convert to ACIP:
if (!tdoc.toACIP(0, if (!tdoc.toACIP(0,
tdoc.getLength(), tdoc.getLength(),
@ -411,7 +411,10 @@ public class TibetanConverter implements FontConverterConstants {
// Write to standard output the result: // Write to standard output the result:
if (TMW_TO_WYLIE_TEXT == ct || TMW_TO_ACIP_TEXT == ct) { if (TMW_TO_WYLIE_TEXT == ct || TMW_TO_ACIP_TEXT == ct) {
try { try {
tdoc.writeTextOutput(new BufferedWriter(new OutputStreamWriter(out))); BufferedWriter bw
= new BufferedWriter(new OutputStreamWriter(out));
tdoc.writeTextOutput(bw);
bw.flush();
} catch (IOException e) { } catch (IOException e) {
exitCode = 40; exitCode = 40;
} }

View file

@ -44,11 +44,18 @@ public class Manipulate
return ch=='a' || ch=='e' || ch=='i' || ch=='o' || ch=='u'; return ch=='a' || ch=='e' || ch=='i' || ch=='o' || ch=='u';
} }
/** Returns null on error. */
public static String wylieToAcip(String palabra) public static String wylieToAcip(String palabra)
{ {
// DLC FIXME: for unknown things, return null. // DLC FIXME: for unknown things, return null.
if (palabra.equals("@#")) return "*"; if (palabra.equals("@#")) return "#";
if (palabra.equals("!")) return "`";
if (palabra.equals("b+h")) return "BH";
if (palabra.equals("d+h")) return "DH";
if (palabra.equals("X")) return null;
if (palabra.equals("iA")) return null;
if (palabra.equals("ai")) return "EE";
if (palabra.equals("au")) return "OO";
if (palabra.equals("$")) return null;
if (palabra.startsWith("@") || palabra.startsWith("#")) if (palabra.startsWith("@") || palabra.startsWith("#"))
return null; // we can't convert this in isolation! We need context. return null; // we can't convert this in isolation! We need context.
char []caract; char []caract;
@ -93,7 +100,7 @@ public class Manipulate
nuevaPalabra = replace(nuevaPalabra, "u", "'U"); nuevaPalabra = replace(nuevaPalabra, "u", "'U");
nuevaPalabra = replace(nuevaPalabra, "-I", "i"); nuevaPalabra = replace(nuevaPalabra, "-I", "i");
nuevaPalabra = replace(nuevaPalabra, "/", ","); nuevaPalabra = replace(nuevaPalabra, "/", ",");
nuevaPalabra = replace(nuevaPalabra, "_", " "); nuevaPalabra = replace(nuevaPalabra, "_", " ");
nuevaPalabra = fixWazur(nuevaPalabra); nuevaPalabra = fixWazur(nuevaPalabra);
return nuevaPalabra; return nuevaPalabra;
} }

View file

@ -168,7 +168,7 @@ public final class DuffCode {
* recursion (manifesting as a StackOverflowError)) */ * recursion (manifesting as a StackOverflowError)) */
public String toString(boolean TMW) { public String toString(boolean TMW) {
boolean[] err = new boolean[] { false }; boolean[] err = new boolean[] { false };
return "<duffcode font=" return "<glyph font="
+ (TMW + (TMW
? TibetanMachineWeb.tmwFontNames ? TibetanMachineWeb.tmwFontNames
: TibetanMachineWeb.tmFontNames)[fontNum] : TibetanMachineWeb.tmFontNames)[fontNum]

View file

@ -73,8 +73,14 @@ public class TGCPair {
vowelWylie = null; vowelWylie = null;
} }
public String getWylie() { public String getWylie() {
return getWylie(false);
}
public String getWylie(boolean appendaged) {
StringBuffer b = new StringBuffer(); StringBuffer b = new StringBuffer();
if (consonantWylie != null) { if (consonantWylie != null) {
if (appendaged && !"'".equals(consonantWylie))
b.append("a"); // pa'am... we want 'am, not 'm; 'ang, not 'ng.
// we may have {p-y}, but the user wants to see {py}. // we may have {p-y}, but the user wants to see {py}.
for (int i = 0; i < consonantWylie.length(); i++) { for (int i = 0; i < consonantWylie.length(); i++) {
char ch = consonantWylie.charAt(i); char ch = consonantWylie.charAt(i);
@ -87,26 +93,35 @@ public class TGCPair {
return b.toString(); return b.toString();
} }
public String getACIP() { public String getACIP() {
return getACIP(false);
}
public String getACIP(boolean appendaged) {
// DLC FIXME: has the EWTS change affected Manipulate.acipToWylie? // DLC FIXME: has the EWTS change affected Manipulate.acipToWylie?
StringBuffer b = new StringBuffer(); StringBuffer b = new StringBuffer();
if (consonantWylie != null) { if (consonantWylie != null) {
String consonantACIP // DLC FIXME can KAsh occur? String consonantACIP
= org.thdl.tib.scanner.Manipulate.wylieToAcip(consonantWylie); = org.thdl.tib.text.ttt.ACIPRules.getACIPForEWTS(consonantWylie);
if (null == consonantACIP) throw new Error("how?"); if (null == consonantACIP) {
// System.out.println("DLC: Wylie=" + consonantWylie + ", ACIP=" + consonantACIP); return TibetanMachineWeb.getTMWToACIPErrorString("glyph with THDL Extended Wylie " + consonantWylie);
// we may have {P-Y}, but the user wants to see {PY}. } else {
for (int i = 0; i < consonantACIP.length(); i++) { if (appendaged && !"'".equals(consonantWylie))
char ch = consonantACIP.charAt(i); b.append("A"); // PA'AM
if ('-' != ch) // we may have {P-Y}, but the user wants to see {PY}.
b.append(ch); for (int i = 0; i < consonantACIP.length(); i++) {
char ch = consonantACIP.charAt(i);
if ('-' != ch)
b.append(ch);
}
} }
} }
if (vowelWylie != null) { if (vowelWylie != null) {
String vowelACIP // DLC FIXME look for exceptions String vowelACIP
= org.thdl.tib.scanner.Manipulate.wylieToAcip(vowelWylie); = org.thdl.tib.text.ttt.ACIPRules.getACIPForEWTS(vowelWylie);
// System.out.println("DLC: Wylie=" + vowelWylie + ", ACIP=" + vowelACIP); if (null == vowelACIP) {
if (null == vowelACIP) throw new Error("how?"); return TibetanMachineWeb.getTMWToACIPErrorString("glyph with THDL Extended Wylie " + vowelWylie);
b.append(vowelACIP); } else {
b.append(vowelACIP);
}
} }
return b.toString(); return b.toString();
} }
@ -150,6 +165,12 @@ public class TGCPair {
} }
this.consonantWylie = consonantWylie; this.consonantWylie = consonantWylie;
if (null != vowelWylie) {
if (vowelWylie.equals("iA") || vowelWylie.equals("Ai"))
vowelWylie = "I";
if (vowelWylie.equals("uA") || vowelWylie.equals("Au"))
vowelWylie = "U";
}
this.vowelWylie = vowelWylie; this.vowelWylie = vowelWylie;
this.classification = realClassification; this.classification = realClassification;
} }

View file

@ -360,7 +360,7 @@ public class TibTextUtils implements THDLWylieConstants {
else else
dc = (DuffCode)glyphs.removeLast(); //LinkedList implementation dc = (DuffCode)glyphs.removeLast(); //LinkedList implementation
glyphs.addAll(getBindu(dc)); getBindu(glyphs, dc);
} }
else { else {
@ -477,303 +477,366 @@ public class TibTextUtils implements THDLWylieConstants {
} }
/** /**
* Gets the bindu sequence for a given context. * Gets the bindu sequence for a given context. In the
* In the TibetanMachineWeb fonts, bindu (anusvara) is realized * TibetanMachineWeb fonts, bindu (anusvara) is realized differently
* differently depending on which vowel it attaches to. Although * depending on which vowel it attaches to. Although the default bindu
* the default bindu glyph is affixed to consonants and subscript vowels, * glyph is affixed to consonants and subscript vowels, for superscript
* for superscript vowels (i, e, o, etc), there is a single glyph * vowels (i, e, o, etc), there is a single glyph which merges the
* which merges the bindu and that vowel together. When you pass this * bindu and that vowel together. When you pass this method a glyph
* method a glyph context, it will return a List of glyphs which * context and a list, it will append to that list glyphs which will either consist
* will either consist of the original glyph followed by the default * of the original glyph followed by the default bindu glyph, or a
* bindu glyph, or a composite vowel+bindu glyph. * composite vowel+bindu glyph. Note that there is only one glyph in
* Note that there is only one glyph in the context. This means that * the context. This means that bindus will not affix properly if
* bindus will not affix properly if superscript vowels are allowed to directly * superscript vowels are allowed to directly precede subscript vowels
* precede subscript vowels (e.g. pou). * (e.g. pou).
* @param dc the DuffCode of the glyph you * @param list a List of DuffCode glyphs to which will be appended the
* want to attach a bindu to * original dc (if non-null) as well as a bindu, or the one glyph that
* @return a List of DuffCode glyphs that include the * represents both
* original dc, as well as a bindu * @param dc the DuffCode of the glyph you want to attach a bindu to,
*/ * or null */
public static List getBindu(DuffCode dc) { public static void getBindu(List list, DuffCode dc) {
List bindus = new ArrayList(); if (null == dc) {
list.add(TibetanMachineWeb.getGlyph(String.valueOf(BINDU)));
if (null == dc) { } else {
bindus.add(TibetanMachineWeb.getGlyph(String.valueOf(BINDU))); if (!TibetanMachineWeb.getBinduMap().containsKey(dc)) {
return bindus; list.add(dc);
} list.add(TibetanMachineWeb.getGlyph(String.valueOf(BINDU)));
} else {
if (!TibetanMachineWeb.getBinduMap().containsKey(dc)) { list.add((DuffCode)TibetanMachineWeb.getBinduMap().get(dc));
bindus.add(dc); }
bindus.add(TibetanMachineWeb.getGlyph(String.valueOf(BINDU))); }
return bindus; }
}
bindus.add((DuffCode)TibetanMachineWeb.getBinduMap().get(dc));
return bindus;
}
/** /**
* Gets the vowel sequence for a given vowel in a given context. * Gets the vowel sequence for a given vowel in a given context. Given
* Given a context, this method affixes a vowel and returns the * a context, this method affixes a vowel and returns the context (iff
* context plus the vowel. Generally, it is enough to provide just * context_added[0] is false) plus the vowel. Generally, it is enough
* one glyph for context. * to provide just one glyph for context.
* @param context the glyph preceding the vowel you want to affix * @param context the glyph preceding the vowel you want to affix
* @param vowel the vowel you want to affix, in Wylie * @param vowel the vowel you want to affix, in Wylie
* @param context_added an array of one boolean, an input/output
* parameter that, if true, means that only the vowel will be added to
* l, not the context, and if false, means that the context and the
* vowel will be added and that context_added[0] will be updated to be
* true
* @return a List of glyphs equal to the vowel in context * @return a List of glyphs equal to the vowel in context
*/ * @throws IllegalArgumentException if the given combination is not
* supported */
public static void getVowel(List l, DuffCode context, String vowel, boolean context_added[]) {
getVowel(l, null, context, vowel, context_added);
}
/** Wrapper that calls for adding context to l. */
public static void getVowel(List l, DuffCode context, String vowel) { public static void getVowel(List l, DuffCode context, String vowel) {
getVowel(l, null, context, vowel); getVowel(l, null, context, vowel, new boolean[] { false });
}
/** Wrapper that calls for adding context to l. */
public static void getVowel(List l, DuffCode context_1, DuffCode context_2, String vowel) {
getVowel(l, context_1, context_2, vowel, new boolean[] { false });
} }
/** /**
* Gets the vowel sequence for a given vowel in a given context and * Gets the vowel sequence for a given vowel in a given context and
* appends it to l. Given a context, this method affixes a vowel and * appends it to l. Given a context, this method affixes a vowel and
* appends the context plus the vowel to l. Since the choice of vowel * appends the context (iff context_added[0] is false) plus the vowel
* glyph depends on the consonant to which it is attached, generally it * to l. Since the choice of vowel glyph depends on the consonant to
* is enough to provide just the immediately preceding * which it is attached, generally it is enough to provide just the
* context. However, in some cases, double vowels are allowed - for * immediately preceding context. However, in some cases, double vowels
* example 'buo'. To find the correct glyph for 'o', we need 'b' in * are allowed - for example 'buo'. To find the correct glyph for 'o',
* this case, not 'u'. Note also that some Extended Wylie vowels * we need 'b' in this case, not 'u'. Note also that some Extended
* correspond to multiple glyphs in TibetanMachineWeb. For example, the * Wylie vowels correspond to multiple glyphs in TibetanMachineWeb. For
* vowel I consists of both an achung and a reverse gigu. All required * example, the vowel I consists of both an achung and a reverse
* glyphs are appended to l. * gigu. All required glyphs are appended to l.
* @param context_1 the glyph occurring two glyphs before the vowel you * @param context_1 the glyph occurring two glyphs before the vowel you
* want to affix * want to affix
* @param context_2 the glyph immediately before the vowel you want to * @param context_2 the glyph immediately before the vowel you want to
* affix * affix
* @param vowel the vowel you want to affix, in Wylie */ * @param vowel the vowel you want to affix, in Wylie
* @param context_added an array of one boolean, an input/output
* parameter that, if true, means that only the vowel will be added to
* l, not the context, and if false, means that the context and the
* vowel will be added and that context_added[0] will be updated to be
* true
* @throws IllegalArgumentException if the given combination is not
* supported */
public static void getVowel(List l, DuffCode context_1, DuffCode context_2, String vowel) { public static void getVowel(List l, DuffCode context_1, DuffCode context_2,
//this vowel doesn't correspond to a glyph - String vowel, boolean context_added[])
//so you just return the original context throws IllegalArgumentException
{
//this vowel doesn't correspond to a glyph -
//so you just return the original context
if ( vowel.equals(WYLIE_aVOWEL) || if (vowel.equals(WYLIE_aVOWEL)
TibetanMachineWeb.isTopVowel(context_2)) { || TibetanMachineWeb.isTopVowel(context_2)) {
if (context_1 != null) if (TibetanMachineWeb.isTopVowel(context_2))
l.add(context_1); throw new IllegalArgumentException("dropping vowels is bad");
if (!context_added[0]) {
context_added[0] = true;
if (context_1 != null)
l.add(context_1);
l.add(context_2); l.add(context_2);
return; }
} return;
}
//first, the three easiest cases: ai, au, and <i //first, the three easiest cases: ai, au, and <i
//these vowels have one invariant form - therefore, //these vowels have one invariant form - therefore,
//dc_context is just returned along with that form //dc_context is just returned along with that form
if (vowel.equals(ai_VOWEL)) { if (vowel.equals(ai_VOWEL)) {
if (context_1 != null) if (!context_added[0]) {
l.add(context_1); context_added[0] = true;
if (context_1 != null)
l.add(context_1);
l.add(context_2); l.add(context_2);
DuffCode[] dc_v = (DuffCode[])TibetanMachineWeb.getTibHash().get(ai_VOWEL); }
l.add(dc_v[TibetanMachineWeb.TMW]); DuffCode[] dc_v = (DuffCode[])TibetanMachineWeb.getTibHash().get(ai_VOWEL);
return; l.add(dc_v[TibetanMachineWeb.TMW]);
} return;
}
if (vowel.equals(au_VOWEL)) { if (vowel.equals(au_VOWEL)) {
if (context_1 != null) if (!context_added[0]) {
l.add(context_1); context_added[0] = true;
if (context_1 != null)
l.add(context_1);
l.add(context_2); l.add(context_2);
DuffCode[] dc_v = (DuffCode[])TibetanMachineWeb.getTibHash().get(au_VOWEL); }
l.add(dc_v[TibetanMachineWeb.TMW]); DuffCode[] dc_v = (DuffCode[])TibetanMachineWeb.getTibHash().get(au_VOWEL);
return; l.add(dc_v[TibetanMachineWeb.TMW]);
} return;
}
if (vowel.equals(reverse_i_VOWEL)) { if (vowel.equals(reverse_i_VOWEL)) {
if (context_1 != null) if (!context_added[0]) {
l.add(context_1); context_added[0] = true;
if (context_1 != null)
l.add(context_1);
l.add(context_2); l.add(context_2);
}
if (!TibetanMachineWeb.isTopVowel(context_2)) {
DuffCode[] dc_v = (DuffCode[])TibetanMachineWeb.getTibHash().get(reverse_i_VOWEL);
l.add(dc_v[TibetanMachineWeb.TMW]);
} else throw new IllegalArgumentException("dropping vowels is bad");
if (!TibetanMachineWeb.isTopVowel(context_2)) { return;
DuffCode[] dc_v = (DuffCode[])TibetanMachineWeb.getTibHash().get(reverse_i_VOWEL); }
l.add(dc_v[TibetanMachineWeb.TMW]);
}
return; //second, the vowels i, e, and o
} //these vowels have many different glyphs each,
//whose correct selection depends on the
//preceding context. therefore, dc_context is
//returned along with the vowel appropriate to
//that context
//second, the vowels i, e, and o if (vowel.equals(i_VOWEL)) {
//these vowels have many different glyphs each, String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
//whose correct selection depends on the DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_i);
//preceding context. therefore, dc_context is if (null == dc_v && null != context_1) {
//returned along with the vowel appropriate to hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_1);
//that context dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_i);
}
if (vowel.equals(i_VOWEL)) { if (!context_added[0]) {
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2); context_added[0] = true;
DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_i); if (context_1 != null)
if (null == dc_v && null != context_1) { l.add(context_1);
hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_1);
dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_i);
}
if (context_1 != null) l.add(context_2);
l.add(context_1); }
if (null != dc_v)
l.add(dc_v);
else throw new IllegalArgumentException("dropping vowels is bad");
l.add(context_2); return;
}
// DLC perfect TMW->Wylie wouldn't produce o'i for an input file containing merely TMW9.61 -- it would produce \u0f7c,\u0f60,\u0f72 -- round-trip shows why.
if (null != dc_v) if (vowel.equals(e_VOWEL)) {
l.add(dc_v); String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_e);
if (null == dc_v && null != context_1) {
hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_1);
dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_e);
}
return; if (!context_added[0]) {
} context_added[0] = true;
if (context_1 != null)
l.add(context_1);
if (vowel.equals(e_VOWEL)) { l.add(context_2);
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2); }
DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_e);
if (null == dc_v && null != context_1) { if (null != dc_v)
hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_1); l.add(dc_v);
dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_e); else throw new IllegalArgumentException("dropping vowels is bad");
}
if (context_1 != null) return;
l.add(context_1); }
l.add(context_2); if (vowel.equals(o_VOWEL)) {
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_o);
if (null == dc_v && null != context_1) {
hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_1);
dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_o);
}
if (null != dc_v) if (!context_added[0]) {
l.add(dc_v); context_added[0] = true;
if (context_1 != null)
l.add(context_1);
return; l.add(context_2);
} }
if (vowel.equals(o_VOWEL)) { if (null != dc_v)
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2); l.add(dc_v);
DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_o); else throw new IllegalArgumentException("dropping vowels is bad");
if (null == dc_v && null != context_1) {
hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_1);
dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_o);
}
if (context_1 != null) return;
l.add(context_1); }
l.add(context_2); //next come the vowels u, A, and U
//these three vowels are grouped together because they all
//can cause the preceding context to change. in particular,
//both u and A cannot be affixed to ordinary k or g, but
//rather the shortened versions of k and g - therefore,
if (null != dc_v) if (vowel.equals(u_VOWEL)) {
l.add(dc_v); String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
DuffCode halfHeight = TibetanMachineWeb.getHalfHeightGlyph(hashKey_context);
DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_u);
return; if (!context_added[0]) {
} context_added[0] = true;
if (null != context_1)
l.add(context_1);
//next come the vowels u, A, and U if (null == halfHeight)
//these three vowels are grouped together because they all l.add(context_2);
//can cause the preceding context to change. in particular, else
//both u and A cannot be affixed to ordinary k or g, but l.add(halfHeight);
//rather the shortened versions of k and g - therefore, }
if (null != dc_v)
l.add(dc_v);
else throw new IllegalArgumentException("dropping vowels is bad");
if (vowel.equals(u_VOWEL)) { return;
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2); }
DuffCode halfHeight = TibetanMachineWeb.getHalfHeightGlyph(hashKey_context);
DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_u);
if (null != context_1) if (vowel.equals(A_VOWEL)) {
l.add(context_1); String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
DuffCode halfHeight = TibetanMachineWeb.getHalfHeightGlyph(hashKey_context);
DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_A);
if (null == halfHeight) if (!context_added[0]) {
l.add(context_2); context_added[0] = true;
else if (null != context_1)
l.add(halfHeight); l.add(context_1);
if (null != dc_v) if (null == halfHeight)
l.add(dc_v); l.add(context_2);
else
l.add(halfHeight);
}
if (null != dc_v)
l.add(dc_v);
else throw new IllegalArgumentException("dropping vowels is bad");
return; return;
} }
if (vowel.equals(A_VOWEL)) { if (vowel.equals(U_VOWEL)) {
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2); String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
DuffCode halfHeight = TibetanMachineWeb.getHalfHeightGlyph(hashKey_context); DuffCode halfHeight = TibetanMachineWeb.getHalfHeightGlyph(hashKey_context);
DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_A); DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_U);
if (null != context_1) if (!context_added[0]) {
l.add(context_1); context_added[0] = true;
if (null != context_1)
l.add(context_1);
if (null == halfHeight) if (null == halfHeight)
l.add(context_2); l.add(context_2);
else else
l.add(halfHeight); l.add(halfHeight);
}
if (null != dc_v && !TibetanMachineWeb.isTopVowel(context_2))
l.add(dc_v);
else throw new IllegalArgumentException("dropping vowels is bad");
if (null != dc_v) return;
}
l.add(dc_v); //finally, the vowels I and <I
//these vowels are unique in that they both
//require a change from the previous character,
//and consist of two glyphs themselves
return; if (vowel.equals(I_VOWEL)) {
} String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
DuffCode halfHeight = TibetanMachineWeb.getHalfHeightGlyph(hashKey_context);
DuffCode dc_v_sub = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_A);
DuffCode dc_v_sup = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_i);
if (vowel.equals(U_VOWEL)) { if (!context_added[0]) {
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2); context_added[0] = true;
DuffCode halfHeight = TibetanMachineWeb.getHalfHeightGlyph(hashKey_context); if (null != context_1)
DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_U); l.add(context_1);
if (null != context_1) if (null == halfHeight)
l.add(context_1); l.add(context_2);
else
l.add(halfHeight);
}
if (null != dc_v_sub && null != dc_v_sup) {
l.add(dc_v_sub);
l.add(dc_v_sup);
} else throw new IllegalArgumentException("dropping vowels is bad");
if (null == halfHeight) return;
l.add(context_2); }
else
l.add(halfHeight);
if (null != dc_v && !TibetanMachineWeb.isTopVowel(context_2)) if (vowel.equals(reverse_I_VOWEL)) {
l.add(dc_v); String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
DuffCode halfHeight = TibetanMachineWeb.getHalfHeightGlyph(hashKey_context);
DuffCode dc_v_sub = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_A);
DuffCode[] tv_array = (DuffCode[])TibetanMachineWeb.getTibHash().get(reverse_i_VOWEL);
DuffCode dc_v_sup = tv_array[TibetanMachineWeb.TMW];
return; if (!context_added[0]) {
} context_added[0] = true;
if (null != context_1)
l.add(context_1);
//finally, the vowels I and <I if (null == halfHeight)
//these vowels are unique in that they both l.add(context_2);
//require a change from the previous character, else
//and consist of two glyphs themselves l.add(halfHeight);
}
if (null != dc_v_sub && null != dc_v_sup) {
l.add(dc_v_sub);
l.add(dc_v_sup);
} else throw new IllegalArgumentException("dropping vowels is bad");
if (vowel.equals(I_VOWEL)) { return;
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2); }
DuffCode halfHeight = TibetanMachineWeb.getHalfHeightGlyph(hashKey_context);
DuffCode dc_v_sub = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_A);
DuffCode dc_v_sup = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_i);
if (null != context_1) throw new IllegalArgumentException("bad vowel " + vowel);
l.add(context_1); }
if (null == halfHeight)
l.add(context_2);
else
l.add(halfHeight);
if (null != dc_v_sub && null != dc_v_sup) {
l.add(dc_v_sub);
l.add(dc_v_sup);
}
return;
}
if (vowel.equals(reverse_I_VOWEL)) {
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
DuffCode halfHeight = TibetanMachineWeb.getHalfHeightGlyph(hashKey_context);
DuffCode dc_v_sub = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_A);
DuffCode[] tv_array = (DuffCode[])TibetanMachineWeb.getTibHash().get(reverse_i_VOWEL);
DuffCode dc_v_sup = tv_array[TibetanMachineWeb.TMW];
if (null != context_1)
l.add(context_1);
if (null == halfHeight)
l.add(context_2);
else
l.add(halfHeight);
if (null != dc_v_sub && null != dc_v_sup) {
l.add(dc_v_sub);
l.add(dc_v_sup);
}
return;
}
throw new Error("DLC can this happen? " + vowel);
}
/** /**
* True if you want TibetanMachineWeb-to-Extended-Wylie conversion * True if you want TibetanMachineWeb-to-Extended-Wylie conversion
@ -844,7 +907,7 @@ public class TibTextUtils implements THDLWylieConstants {
// DLC FIXME: {H}, U+0F7F, is part of a grapheme cluster! // DLC FIXME: {H}, U+0F7F, is part of a grapheme cluster!
// David Chapman and I both need a comprehensive list of these // David Chapman and I both need a comprehensive list of these
// guys. // guys. Get it from Unicode 4.0 spec?
/** Scans the glyphs in glyphList and creates the returned list of /** Scans the glyphs in glyphList and creates the returned list of
grapheme clusters based on them. A grapheme cluster is a grapheme clusters based on them. A grapheme cluster is a
consonant or consonant stack with optional adornment or a consonant or consonant stack with optional adornment or a
@ -889,6 +952,11 @@ public class TibTextUtils implements THDLWylieConstants {
} else if (TibetanMachineWeb.isWylieAdornmentAndContainsVowel(wylie) } else if (TibetanMachineWeb.isWylieAdornmentAndContainsVowel(wylie)
|| TibetanMachineWeb.isWylieAdornment(wylie)) { || TibetanMachineWeb.isWylieAdornment(wylie)) {
buildingUpVowel.append(wylie); buildingUpVowel.append(wylie);
// DLC FIXME: I bet three or four vowels together
// breaks TMW->ACIP and TMW->EWTS. Test it. When it
// does, revamp TGCPair to have a set of vowels. The
// output order should be consistent with the
// Unicode-imposed order on vowels.
} else { } else {
// number or weird thing: // number or weird thing:
@ -1134,12 +1202,6 @@ public class TibTextUtils implements THDLWylieConstants {
if (isAppendageNonVowelWylie(wylie)) { if (isAppendageNonVowelWylie(wylie)) {
candidateType candidateType
= candidateType.substring("maybe-".length()).intern(); = candidateType.substring("maybe-".length()).intern();
// So that we get 'am, not 'm; 'ang, not 'ng:
// FIXME: cludge: weird place to do this.
// pa'am, not pa'm is what we want, sure,
// but doing this here is ugly.
tp.setWylie(WYLIE_aVOWEL + tp.getWylie());
} else { } else {
if (null != warnings) if (null != warnings)
warnings.append("Found a tsheg bar that has an achung (" + ACHUNG + ") tacked on, followed by some other thing whose wylie is " + wylie + "\n"); warnings.append("Found a tsheg bar that has an achung (" + ACHUNG + ") tacked on, followed by some other thing whose wylie is " + wylie + "\n");
@ -1264,7 +1326,7 @@ public class TibTextUtils implements THDLWylieConstants {
if (TibetanMachineWeb.isWylieTibetanConsonantOrConsonantStack(wylie) if (TibetanMachineWeb.isWylieTibetanConsonantOrConsonantStack(wylie)
|| TibetanMachineWeb.isWylieSanskritConsonantStack(wylie)) { || TibetanMachineWeb.isWylieSanskritConsonantStack(wylie)) {
translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie)); translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie));
} else { } else if (i + 1 < sz) {
if (TGCPair.CONSONANTAL_WITH_VOWEL != cls if (TGCPair.CONSONANTAL_WITH_VOWEL != cls
&& TGCPair.SANSKRIT_WITH_VOWEL != cls) && TGCPair.SANSKRIT_WITH_VOWEL != cls)
translitBuffer.append(EWTSNotACIP ? WYLIE_DISAMBIGUATING_KEY : '-'); translitBuffer.append(EWTSNotACIP ? WYLIE_DISAMBIGUATING_KEY : '-');
@ -1277,7 +1339,8 @@ public class TibTextUtils implements THDLWylieConstants {
int leftover = sz + 1; int leftover = sz + 1;
// Appendaged vs. not appendaged? it affects nothing at // Appendaged vs. not appendaged? it affects nothing at
// this stage. // this stage except for pa'm vs. pa'am.
boolean appendaged = (candidateType.startsWith("appendaged-"));
candidateType = getCandidateTypeModuloAppendage(candidateType); candidateType = getCandidateTypeModuloAppendage(candidateType);
if ("prefix/root-root/suffix-suffix/postsuffix" == candidateType) { if ("prefix/root-root/suffix-suffix/postsuffix" == candidateType) {
@ -1433,7 +1496,9 @@ public class TibTextUtils implements THDLWylieConstants {
// append the wylie/ACIP left over: // append the wylie/ACIP left over:
for (int i = leftover; i < sz; i++) { for (int i = leftover; i < sz; i++) {
TGCPair tp = (TGCPair)gcs.get(i); TGCPair tp = (TGCPair)gcs.get(i);
translitBuffer.append(EWTSNotACIP ? tp.getWylie() : tp.getACIP()); translitBuffer.append(EWTSNotACIP
? tp.getWylie(appendaged)
: tp.getACIP(appendaged));
} }
} }
} }
@ -1468,6 +1533,7 @@ public class TibTextUtils implements THDLWylieConstants {
ArrayList glyphList = new ArrayList(); ArrayList glyphList = new ArrayList();
StringBuffer translitBuffer = new StringBuffer(); StringBuffer translitBuffer = new StringBuffer();
// DLC FIXME: " " should become " ", and test with ACIP # and *.
for (int i=0; i<dcs.length; i++) { for (int i=0; i<dcs.length; i++) {
char ch = dcs[i].getCharacter(); char ch = dcs[i].getCharacter();
int k = dcs[i].getCharNum(); int k = dcs[i].getCharNum();
@ -1482,6 +1548,14 @@ public class TibTextUtils implements THDLWylieConstants {
warnings.append("Some glyphs came right before a newline; they did not have a tsheg or shad come first."); warnings.append("Some glyphs came right before a newline; they did not have a tsheg or shad come first.");
} }
// In ACIP, \n\n (or \r\n\r\n with DOS line feeds)
// indicates a real line break.
if (!EWTSNotACIP && '\n' == ch) {
if (i > 0 && dcs[i - 1].getCharacter() == '\r')
translitBuffer.append("\r\n");
else
translitBuffer.append(ch);
}
translitBuffer.append(ch); translitBuffer.append(ch);
} else { } else {
String wylie = TibetanMachineWeb.getWylieForGlyph(dcs[i], noSuch); String wylie = TibetanMachineWeb.getWylieForGlyph(dcs[i], noSuch);

View file

@ -994,6 +994,8 @@ private static boolean isAmbHelper(String y) {
* @return true if x + y is ambiguous in the Extended Wylie * @return true if x + y is ambiguous in the Extended Wylie
* transliteration, false if not */ * transliteration, false if not */
public static boolean isAmbiguousWylie(String x, String y) { public static boolean isAmbiguousWylie(String x, String y) {
// DLC NOW: BDE vs. B+DE -- TMW->ACIP should give B+DE to be very friendly to machines.
// What about ambiguity between wa-zur and wa? dwa vs. d.wa, e.g.? // What about ambiguity between wa-zur and wa? dwa vs. d.wa, e.g.?
// Some would say it doesn't matter, because that's illegal. wa // Some would say it doesn't matter, because that's illegal. wa
// doesn't take any prefixes. But I want even illegal stuff to // doesn't take any prefixes. But I want even illegal stuff to
@ -1719,19 +1721,21 @@ private static String acipForGlyph(String hashKey) {
* documented in www/htdocs/TMW_RTF_TO_THDL_WYLIE.html, so change * documented in www/htdocs/TMW_RTF_TO_THDL_WYLIE.html, so change
* them both when you change this. */ * them both when you change this. */
private static String getTMWToWylieErrorString(DuffCode dc) { private static String getTMWToWylieErrorString(DuffCode dc) {
return "<<[[JSKAD_TMW_TO_WYLIE_ERROR_NO_SUCH_WYLIE: Cannot convert DuffCode " return "<<[[JSKAD_TMW_TO_WYLIE_ERROR_NO_SUCH_WYLIE: Cannot convert "
+ dc.toString(true) + dc.toString(true)
+ " to THDL Extended Wylie. Please see the documentation for the TMW font and transcribe this yourself.]]>>"; + " to THDL Extended Wylie. Please see the documentation for the TM or TMW font and transcribe this yourself.]]>>";
} }
/** Error that appears in a document when some TMW cannot be /** Error that appears in a document when some TMW cannot be
* transcribed in ACIP. This error message is * transcribed in ACIP. This error message is
* documented in www/htdocs/TMW_RTF_TO_THDL_WYLIE.html (DLC NOT YET), so change * documented in www/htdocs/TMW_RTF_TO_THDL_WYLIE.html (DLC NOT YET), so change
* them both when you change this. */ * them both when you change this. */
static String getTMWToACIPErrorString(String it) {
return "[# JSKAD_TMW_TO_ACIP_ERROR_NO_SUCH_ACIP: Cannot convert " + it + " to ACIP. Please transcribe this yourself.]";
}
private static String getTMWToACIPErrorString(DuffCode dc) { private static String getTMWToACIPErrorString(DuffCode dc) {
return "<<[[JSKAD_TMW_TO_ACIP_ERROR_NO_SUCH_ACIP: Cannot convert DuffCode " return getTMWToACIPErrorString(dc.toString(true));
+ dc.toString(true)
+ " to ACIP. Please see the documentation for the TMW font and transcribe this yourself.]]>>";
} }
/** /**

View file

@ -446,12 +446,18 @@ public class ACIPConverter {
if (!lastGuyWasNonPunct if (!lastGuyWasNonPunct
|| (null != lastGuy || (null != lastGuy
&& (lpl = lastGuy.get(lastGuy.size() - 1)).size() == 1 && (lpl = lastGuy.get(lastGuy.size() - 1)).size() == 1
&& lpl.get(0).getLeft().equals("G") // "GU ," and "KU ," each have
&& // it's (G . anything) // tshegs, but "GI ," and "KI
// followed by some number // ," each have a Tibetan
// of spaces (at least one, // space.
// this one) and then a && ((lpl.get(0).getLeft().equals("G")
// comma: || lpl.get(0).getLeft().equals("K"))
&& (lpl.get(0).getRight().indexOf('U') < 0))
&&
// it's (G . anything)
// followed by some number of
// spaces (at least one, this
// one) and then a comma:
peekaheadFindsSpacesAndComma(scan, i+1))) { peekaheadFindsSpacesAndComma(scan, i+1))) {
if (null != writer) { if (null != writer) {
unicode = " "; unicode = " ";

View file

@ -21,6 +21,7 @@ package org.thdl.tib.text.ttt;
import java.util.HashSet; import java.util.HashSet;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashMap; import java.util.HashMap;
import java.util.StringTokenizer;
import java.util.List; import java.util.List;
import org.thdl.tib.text.DuffCode; import org.thdl.tib.text.DuffCode;
@ -30,7 +31,7 @@ import org.thdl.tib.text.TibTextUtils;
/** Canonizes some facts regarding the ACIP transcription system. /** Canonizes some facts regarding the ACIP transcription system.
* @author David Chandler */ * @author David Chandler */
class ACIPRules { public class ACIPRules {
/** {Ksh}, the longest consonant, has 3 characters, so this is /** {Ksh}, the longest consonant, has 3 characters, so this is
* three. */ * three. */
public static int MAX_CONSONANT_LENGTH = 3; public static int MAX_CONSONANT_LENGTH = 3;
@ -66,7 +67,7 @@ class ACIPRules {
// DLC I'm on my own with 'O and 'E and 'OO and 'EE, but // DLC I'm on my own with 'O and 'E and 'OO and 'EE, but
// GANG'O appears and I wonder... so here they are. It's // GANG'O appears and I wonder... so here they are. It's
// consistent with 'I and 'A and 'U, at least: all the vowels // consistent with 'I and 'A and 'U, at least: all the vowels
// may appear as K'vowel. // may appear as K'vowel. DLC FIMXE: ask.
acipVowels.add(baseVowels[i][0]); acipVowels.add(baseVowels[i][0]);
acipVowels.add('\'' + baseVowels[i][0]); acipVowels.add('\'' + baseVowels[i][0]);
@ -140,6 +141,43 @@ class ACIPRules {
return consonants.contains(acip); return consonants.contains(acip);
} }
private static HashMap wylieToACIP = null;
/** Returns the ACIP transliteration corresponding to the THDL
Extended Wylie <em>atom</em> EWTS, or null if EWTS is not
recognized. */
public static String getACIPForEWTS(String EWTS) {
getWylieForACIPConsonant(null);
getWylieForACIPOther(null);
getWylieForACIPVowel(null);
String ans = (String)wylieToACIP.get(EWTS);
if (null == ans) {
StringBuffer finalAns = new StringBuffer(EWTS.length());
StringTokenizer sTok = new StringTokenizer(EWTS, "-+", true);
while (sTok.hasMoreTokens()) {
String part, tok = sTok.nextToken();
if (tok.equals("-") || tok.equals("+"))
part = tok;
else
part = (String)wylieToACIP.get(tok);
if (null == part) return null;
finalAns.append(part);
}
return finalAns.toString();
}
return ans;
}
/** Registers acip->wylie mappings in toWylie; registers
wylie->acip mappings in {@link #wylieToACIP}. */
private static void putMapping(HashMap toWylie, String ACIP, String EWTS) {
toWylie.put(ACIP, EWTS);
if (null == wylieToACIP) {
wylieToACIP = new HashMap(75);
wylieToACIP.put("_", " "); // oddball.
}
wylieToACIP.put(EWTS, ACIP);
}
private static HashMap acipConsonant2wylie = null; private static HashMap acipConsonant2wylie = null;
/** Returns the EWTS corresponding to the given ACIP consonant /** Returns the EWTS corresponding to the given ACIP consonant
* (without the "A" vowel). Returns null if there is no such * (without the "A" vowel). Returns null if there is no such
@ -149,52 +187,52 @@ class ACIPRules {
acipConsonant2wylie = new HashMap(37); acipConsonant2wylie = new HashMap(37);
// oddball: // oddball:
acipConsonant2wylie.put("V", "w"); putMapping(acipConsonant2wylie, "V", "w");
// more oddballs: // more oddballs:
acipConsonant2wylie.put("DH", "d+h"); putMapping(acipConsonant2wylie, "DH", "d+h");
acipConsonant2wylie.put("BH", "b+h"); putMapping(acipConsonant2wylie, "BH", "b+h");
acipConsonant2wylie.put("dH", "D+h"); putMapping(acipConsonant2wylie, "dH", "D+h");
acipConsonant2wylie.put("DZH", "dz+h"); putMapping(acipConsonant2wylie, "DZH", "dz+h");
acipConsonant2wylie.put("Ksh", "k+Sh"); putMapping(acipConsonant2wylie, "Ksh", "k+Sh");
acipConsonant2wylie.put("GH", "g+h"); putMapping(acipConsonant2wylie, "GH", "g+h");
acipConsonant2wylie.put("K", "k"); putMapping(acipConsonant2wylie, "K", "k");
acipConsonant2wylie.put("KH", "kh"); putMapping(acipConsonant2wylie, "KH", "kh");
acipConsonant2wylie.put("G", "g"); putMapping(acipConsonant2wylie, "G", "g");
acipConsonant2wylie.put("NG", "ng"); putMapping(acipConsonant2wylie, "NG", "ng");
acipConsonant2wylie.put("C", "c"); putMapping(acipConsonant2wylie, "C", "c");
acipConsonant2wylie.put("CH", "ch"); putMapping(acipConsonant2wylie, "CH", "ch");
acipConsonant2wylie.put("J", "j"); putMapping(acipConsonant2wylie, "J", "j");
acipConsonant2wylie.put("NY", "ny"); putMapping(acipConsonant2wylie, "NY", "ny");
acipConsonant2wylie.put("T", "t"); putMapping(acipConsonant2wylie, "T", "t");
acipConsonant2wylie.put("TH", "th"); putMapping(acipConsonant2wylie, "TH", "th");
acipConsonant2wylie.put("D", "d"); putMapping(acipConsonant2wylie, "D", "d");
acipConsonant2wylie.put("N", "n"); putMapping(acipConsonant2wylie, "N", "n");
acipConsonant2wylie.put("P", "p"); putMapping(acipConsonant2wylie, "P", "p");
acipConsonant2wylie.put("PH", "ph"); putMapping(acipConsonant2wylie, "PH", "ph");
acipConsonant2wylie.put("B", "b"); putMapping(acipConsonant2wylie, "B", "b");
acipConsonant2wylie.put("M", "m"); putMapping(acipConsonant2wylie, "M", "m");
acipConsonant2wylie.put("TZ", "ts"); putMapping(acipConsonant2wylie, "TZ", "ts");
acipConsonant2wylie.put("TS", "tsh"); putMapping(acipConsonant2wylie, "TS", "tsh");
acipConsonant2wylie.put("DZ", "dz"); putMapping(acipConsonant2wylie, "DZ", "dz");
acipConsonant2wylie.put("W", "w"); putMapping(acipConsonant2wylie, "W", "w");
acipConsonant2wylie.put("ZH", "zh"); putMapping(acipConsonant2wylie, "ZH", "zh");
acipConsonant2wylie.put("Z", "z"); putMapping(acipConsonant2wylie, "Z", "z");
acipConsonant2wylie.put("'", "'"); putMapping(acipConsonant2wylie, "'", "'");
acipConsonant2wylie.put("Y", "y"); putMapping(acipConsonant2wylie, "Y", "y");
acipConsonant2wylie.put("R", "r"); putMapping(acipConsonant2wylie, "R", "r");
acipConsonant2wylie.put("L", "l"); putMapping(acipConsonant2wylie, "L", "l");
acipConsonant2wylie.put("SH", "sh"); putMapping(acipConsonant2wylie, "SH", "sh");
acipConsonant2wylie.put("S", "s"); putMapping(acipConsonant2wylie, "S", "s");
acipConsonant2wylie.put("H", "h"); putMapping(acipConsonant2wylie, "H", "h");
acipConsonant2wylie.put("A", "a"); putMapping(acipConsonant2wylie, "A", "a");
acipConsonant2wylie.put("t", "T"); putMapping(acipConsonant2wylie, "t", "T");
acipConsonant2wylie.put("th", "Th"); putMapping(acipConsonant2wylie, "th", "Th");
acipConsonant2wylie.put("d", "D"); putMapping(acipConsonant2wylie, "d", "D");
acipConsonant2wylie.put("n", "N"); putMapping(acipConsonant2wylie, "n", "N");
acipConsonant2wylie.put("sh", "Sh"); putMapping(acipConsonant2wylie, "sh", "Sh");
} }
return (String)acipConsonant2wylie.get(acip); return (String)acipConsonant2wylie.get(acip);
} }
@ -207,14 +245,14 @@ class ACIPRules {
acipVowel2wylie = new HashMap(baseVowels.length * 4); acipVowel2wylie = new HashMap(baseVowels.length * 4);
for (int i = 0; i < baseVowels.length; i++) { for (int i = 0; i < baseVowels.length; i++) {
acipVowel2wylie.put(baseVowels[i][0], baseVowels[i][1]); putMapping(acipVowel2wylie, baseVowels[i][0], baseVowels[i][1]);
acipVowel2wylie.put('\'' + baseVowels[i][0], baseVowels[i][2]); putMapping(acipVowel2wylie, '\'' + baseVowels[i][0], baseVowels[i][2]);
acipVowel2wylie.put(baseVowels[i][0] + 'm', baseVowels[i][1] + 'M'); putMapping(acipVowel2wylie, baseVowels[i][0] + 'm', baseVowels[i][1] + 'M');
acipVowel2wylie.put('\'' + baseVowels[i][0] + 'm', baseVowels[i][2] + 'M'); putMapping(acipVowel2wylie, '\'' + baseVowels[i][0] + 'm', baseVowels[i][2] + 'M');
acipVowel2wylie.put(baseVowels[i][0] + ':', baseVowels[i][1] + 'H'); putMapping(acipVowel2wylie, baseVowels[i][0] + ':', baseVowels[i][1] + 'H');
acipVowel2wylie.put('\'' + baseVowels[i][0] + ':', baseVowels[i][2] + 'H'); putMapping(acipVowel2wylie, '\'' + baseVowels[i][0] + ':', baseVowels[i][2] + 'H');
acipVowel2wylie.put(baseVowels[i][0] + "m:", baseVowels[i][1] + "MH"); putMapping(acipVowel2wylie, baseVowels[i][0] + "m:", baseVowels[i][1] + "MH");
acipVowel2wylie.put('\'' + baseVowels[i][0] + "m:", baseVowels[i][2] + "MH"); putMapping(acipVowel2wylie, '\'' + baseVowels[i][0] + "m:", baseVowels[i][2] + "MH");
} }
} }
return (String)acipVowel2wylie.get(acip); return (String)acipVowel2wylie.get(acip);
@ -228,27 +266,27 @@ class ACIPRules {
acipOther2wylie = new HashMap(20); acipOther2wylie = new HashMap(20);
// DLC FIXME: check all these again. // DLC FIXME: check all these again.
acipOther2wylie.put(",", "/"); putMapping(acipOther2wylie, ",", "/");
acipOther2wylie.put(" ", " "); putMapping(acipOther2wylie, " ", " ");
acipOther2wylie.put(".", "*"); putMapping(acipOther2wylie, ".", "*");
acipOther2wylie.put("|", "|"); putMapping(acipOther2wylie, "|", "|");
acipOther2wylie.put("`", "!"); putMapping(acipOther2wylie, "`", "!");
acipOther2wylie.put(";", ";"); putMapping(acipOther2wylie, ";", ";");
acipOther2wylie.put("*", "@"); putMapping(acipOther2wylie, "*", "@");
acipOther2wylie.put("#", "@#"); putMapping(acipOther2wylie, "#", "@#");
acipOther2wylie.put("%", "~X"); putMapping(acipOther2wylie, "%", "~X");
acipOther2wylie.put("&", "&"); putMapping(acipOther2wylie, "&", "&");
acipOther2wylie.put("0", "0"); putMapping(acipOther2wylie, "0", "0");
acipOther2wylie.put("1", "1"); putMapping(acipOther2wylie, "1", "1");
acipOther2wylie.put("2", "2"); putMapping(acipOther2wylie, "2", "2");
acipOther2wylie.put("3", "3"); putMapping(acipOther2wylie, "3", "3");
acipOther2wylie.put("4", "4"); putMapping(acipOther2wylie, "4", "4");
acipOther2wylie.put("5", "5"); putMapping(acipOther2wylie, "5", "5");
acipOther2wylie.put("6", "6"); putMapping(acipOther2wylie, "6", "6");
acipOther2wylie.put("7", "7"); putMapping(acipOther2wylie, "7", "7");
acipOther2wylie.put("8", "8"); putMapping(acipOther2wylie, "8", "8");
acipOther2wylie.put("9", "9"); putMapping(acipOther2wylie, "9", "9");
} }
return (String)acipOther2wylie.get(acip); return (String)acipOther2wylie.get(acip);
} }
@ -465,39 +503,52 @@ class ACIPRules {
/** Gets the duffcodes for vowel, such that they look good with /** Gets the duffcodes for vowel, such that they look good with
* the stack with hash key hashKey, and appends them to r. */ * the stack with hash key hashKey, and appends them to r. */
static void getDuffForACIPVowel(ArrayList r, DuffCode preceding, String vowel) { static void getDuffForACIPVowel(ArrayList duff, DuffCode preceding, String vowel) {
if (null == vowel) return; if (null == vowel) return;
if (null == getWylieForACIPVowel(vowel)) // FIXME: expensive assertion! Use assert. if (null == getWylieForACIPVowel(vowel)) // FIXME: expensive assertion! Use assert.
throw new IllegalArgumentException("Vowel " + vowel + " isn't in the small set of vowels we handle correctly."); throw new IllegalArgumentException("Vowel " + vowel + " isn't in the small set of vowels we handle correctly.");
// Order matters here. // Order matters here.
boolean context_added[] = new boolean[] { false };
if (vowel.startsWith("A")) { if (vowel.startsWith("A")) {
TibTextUtils.getVowel(r, preceding, THDLWylieConstants.WYLIE_aVOWEL); TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.WYLIE_aVOWEL, context_added);
} else if (vowel.indexOf("'U") >= 0) { } else if (vowel.indexOf("'U") >= 0) {
TibTextUtils.getVowel(r, preceding, "U"); TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.U_VOWEL, context_added);
} else if (vowel.indexOf("'I") >= 0) {
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.I_VOWEL, context_added);
} else { } else {
if (vowel.indexOf('\'') >= 0) if (vowel.indexOf('\'') >= 0) {
TibTextUtils.getVowel(r, preceding, THDLWylieConstants.A_VOWEL); TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.A_VOWEL, context_added);
if (vowel.indexOf("EE") >= 0) }
TibTextUtils.getVowel(r, preceding, THDLWylieConstants.ai_VOWEL); if (vowel.indexOf("EE") >= 0) {
else if (vowel.indexOf('E') >= 0) TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.ai_VOWEL, context_added);
TibTextUtils.getVowel(r, preceding, THDLWylieConstants.e_VOWEL); } else if (vowel.indexOf('E') >= 0) {
if (vowel.indexOf("OO") >= 0) TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.e_VOWEL, context_added);
TibTextUtils.getVowel(r, preceding, THDLWylieConstants.au_VOWEL); }
else if (vowel.indexOf('O') >= 0) if (vowel.indexOf("OO") >= 0) {
TibTextUtils.getVowel(r, preceding, THDLWylieConstants.o_VOWEL); TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.au_VOWEL, context_added);
if (vowel.indexOf('I') >= 0) } else if (vowel.indexOf('O') >= 0) {
TibTextUtils.getVowel(r, preceding, THDLWylieConstants.i_VOWEL); TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.o_VOWEL, context_added);
if (vowel.indexOf('U') >= 0) }
TibTextUtils.getVowel(r, preceding, THDLWylieConstants.u_VOWEL); if (vowel.indexOf('I') >= 0) {
if (vowel.indexOf('i') >= 0) TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.i_VOWEL, context_added);
TibTextUtils.getVowel(r, preceding, THDLWylieConstants.reverse_i_VOWEL); }
if (vowel.indexOf('U') >= 0) {
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.u_VOWEL, context_added);
}
if (vowel.indexOf('i') >= 0) {
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.reverse_i_VOWEL, context_added);
}
} }
// DLC FIXME: Use TMW9.61, the "o'i" special combination, when appropriate.
if (vowel.indexOf('m') >= 0) if (vowel.indexOf('m') >= 0) {
r.add(TibetanMachineWeb.getGlyph("M")); DuffCode last = (DuffCode)duff.get(duff.size() - 1);
duff.remove(duff.size() - 1);
TibTextUtils.getBindu(duff, last);
}
if (vowel.indexOf(':') >= 0) if (vowel.indexOf(':') >= 0)
r.add(TibetanMachineWeb.getGlyph("H")); duff.add(TibetanMachineWeb.getGlyph("H"));
} }
} }

View file

@ -1,3 +1,4 @@
// DLC NOW: KAsh ->Ksh here! optionally!
/* /*
The contents of this file are subject to the THDL Open Community License The contents of this file are subject to the THDL Open Community License
Version 1.0 (the "License"); you may not use this file except in compliance Version 1.0 (the "License"); you may not use this file except in compliance

View file

@ -340,6 +340,22 @@ tstHelper("KA'", "[(K . A), (' . )]",
new String[] { }, new String[] { },
"{G+G}{YE}{S}"); "{G+G}{YE}{S}");
// DLC FIXME: warn about BDE vs. B+DE. color such differently. Maybe an inputter saw B+DE and typed in BDE, not thinking.
tstHelper("BDE", "{B}{DE}",
new String[] { "{B}{DE}", "{B+DE}" },
new String[] { "{B}{DE}" },
"{B}{DE}");
tstHelper("SHR'I", "{SH}{R'I}",
null,
null,
"{SH+R'I}");
// DLC FIXME: test EWTS {pouM}
// DLC FIXME: do TMW->ACIP->TMW->ACIP round-trip.
tstHelper("DRUG", "{D}{RU}{G}", tstHelper("DRUG", "{D}{RU}{G}",
new String[] { "{D}{RU}{G}", "{D+RU}{G}" }, new String[] { "{D}{RU}{G}", "{D+RU}{G}" },
new String[] { "{D+RU}{G}" }, new String[] { "{D+RU}{G}" },
@ -7302,6 +7318,7 @@ tstHelper("ZUR");
"\u0f04\u0f05\u0f04\u0f05\u0f05\u0f67\u0f74\u0f7e\u0f7f\u0f0b\u0f42\u0fa2\u0f7d\u0f0b\u0f42\u0fb2\u0f7c\u0f08\u0f11\u0f0c\u0f0d"); "\u0f04\u0f05\u0f04\u0f05\u0f05\u0f67\u0f74\u0f7e\u0f7f\u0f0b\u0f42\u0fa2\u0f7d\u0f0b\u0f42\u0fb2\u0f7c\u0f08\u0f11\u0f0c\u0f0d");
uhelp("*#HUm: K+DHA GRO`;.,", uhelp("*#HUm: K+DHA GRO`;.,",
"\u0f04\u0f05\u0f04\u0f05\u0f05\u0f67\u0f74\u0f7e\u0f7f\u0f0b[#ERROR CONVERTING ACIP DOCUMENT: THE TSHEG BAR (\"SYLLABLE\") K+DHA IS ESSENTIALLY NOTHING.]\u0f0b\u0f42\u0fb2\u0f7c\u0f08\u0f11\u0f0c\u0f0d"); "\u0f04\u0f05\u0f04\u0f05\u0f05\u0f67\u0f74\u0f7e\u0f7f\u0f0b[#ERROR CONVERTING ACIP DOCUMENT: THE TSHEG BAR (\"SYLLABLE\") K+DHA IS ESSENTIALLY NOTHING.]\u0f0b\u0f42\u0fb2\u0f7c\u0f08\u0f11\u0f0c\u0f0d");
// DLC FIXME: the file ACIP_SHRI should be made into an ACIP->TMW automated test case
} }
/** Tests some more tsheg bars, these from Dr. Lacey's critical /** Tests some more tsheg bars, these from Dr. Lacey's critical

View file

@ -197,32 +197,42 @@ class TParseTree {
* stack can take every prefix, which is not the case in * stack can take every prefix, which is not the case in
* reality */ * reality */
public TStackListList getUniqueParse(boolean noPrefixTests) { public TStackListList getUniqueParse(boolean noPrefixTests) {
TStackListList allLegalParses = new TStackListList(2); // save memory // For Sanskrit+Tibetan:
TStackListList allNonillegalParses = new TStackListList(2); // save memory
// For Tibetan only:
TStackListList allStrictlyLegalParses = new TStackListList(2); // save memory
TStackListList legalParsesWithVowelOnRoot = new TStackListList(1); TStackListList legalParsesWithVowelOnRoot = new TStackListList(1);
ParseIterator pi = getParseIterator(); ParseIterator pi = getParseIterator();
while (pi.hasNext()) { while (pi.hasNext()) {
TStackList sl = pi.next(); TStackList sl = pi.next();
BoolPair bpa = sl.isLegalTshegBar(noPrefixTests); BoolTriple bt = sl.isLegalTshegBar(noPrefixTests);
if (bpa.isLegal) { if (bt.isLegal) {
if (bpa.isLegalAndHasAVowelOnRoot) if (bt.isLegalAndHasAVowelOnRoot)
legalParsesWithVowelOnRoot.add(sl); legalParsesWithVowelOnRoot.add(sl);
allLegalParses.add(sl); if (!bt.isLegalButSanskrit)
allStrictlyLegalParses.add(sl);
allNonillegalParses.add(sl);
} }
} }
if (legalParsesWithVowelOnRoot.size() == 1) if (legalParsesWithVowelOnRoot.size() == 1)
return legalParsesWithVowelOnRoot; return legalParsesWithVowelOnRoot;
else { else {
if (allStrictlyLegalParses.size() == 1)
return allStrictlyLegalParses;
if (allStrictlyLegalParses.size() > 2)
throw new Error("can this happen?");
if (legalParsesWithVowelOnRoot.size() == 2) { if (legalParsesWithVowelOnRoot.size() == 2) {
if (legalParsesWithVowelOnRoot.get(0).size() != 1 + legalParsesWithVowelOnRoot.get(1).size()) if (legalParsesWithVowelOnRoot.get(0).size() != 1 + legalParsesWithVowelOnRoot.get(1).size())
throw new Error("Something other than the G-YA vs. GYA case appeared. Sorry for your trouble! " + legalParsesWithVowelOnRoot.get(0) + " ;; " + legalParsesWithVowelOnRoot.get(1)); throw new Error("Something other than the G-YA vs. GYA case appeared. Sorry for your trouble! " + legalParsesWithVowelOnRoot.get(0) + " ;; " + legalParsesWithVowelOnRoot.get(1));
return new TStackListList(legalParsesWithVowelOnRoot.get(1)); return new TStackListList(legalParsesWithVowelOnRoot.get(1));
} }
if (allLegalParses.size() == 2) { if (allNonillegalParses.size() == 2) {
if (allLegalParses.get(0).size() != 1 + allLegalParses.get(1).size()) if (allNonillegalParses.get(0).size() != 1 + allNonillegalParses.get(1).size())
throw new Error("Something other than the G-YA vs. GYA case appeared. Sorry for your trouble! " + allLegalParses.get(0) + " ;; " + allLegalParses.get(1)); throw new Error("Something other than the G-YA vs. GYA case appeared. Sorry for your trouble! " + allNonillegalParses.get(0) + " ;; " + allNonillegalParses.get(1));
return new TStackListList(allLegalParses.get(1)); return new TStackListList(allNonillegalParses.get(1));
} }
return allLegalParses; return allNonillegalParses;
} }
} }

View file

@ -121,16 +121,16 @@ class TStackList {
* happen. */ * happen. */
public ListIterator listIterator() { return al.listIterator(); } public ListIterator listIterator() { return al.listIterator(); }
/** Returns a pair with {@link BoolPair#isLegal} true if and only /** Returns a pair with {@link BoolTriple#isLegal} true if and
* if this list of stacks is a legal tsheg bar by the rules of * only if this list of stacks is a legal tsheg bar by the rules
* Tibetan syntax (sometimes called rules of spelling). If this * of Tibetan syntax (sometimes called rules of spelling). If
* is legal, then {@link BoolPair#isLegalAndHasAVowelOnRoot} will * this is legal, then {@link
* be true if and only if there is an explicit {A} vowel on the * BoolTriple#isLegalAndHasAVowelOnRoot} will be true if and only
* root stack. * if there is an explicit {A} vowel on the root stack.
* @param noPrefixTests true if you want to pretend that every * @param noPrefixTests true if you want to pretend that every
* stack can take every prefix, which is not the case in * stack can take every prefix, which is not the case in
* reality */ * reality */
public BoolPair isLegalTshegBar(boolean noPrefixTests) { public BoolTriple isLegalTshegBar(boolean noPrefixTests) {
// DLC handle PADMA and other Tibetanized Sanskrit fellows consistently. Right now we only treat single-stack Sanskrit guys as legal. // DLC handle PADMA and other Tibetanized Sanskrit fellows consistently. Right now we only treat single-stack Sanskrit guys as legal.
TTGCList tgcList = new TTGCList(this); TTGCList tgcList = new TTGCList(this);
@ -162,7 +162,9 @@ class TStackList {
} }
} }
} }
return new BoolPair(isLegal, isLegalAndHasAVowelOnRoot); return new BoolTriple(isLegal,
(candidateType == "single-sanskrit-gc"),
isLegalAndHasAVowelOnRoot);
} }
private static final boolean ddebug = false; private static final boolean ddebug = false;
@ -232,11 +234,15 @@ class TStackList {
} }
/** Too simple to comment. */ /** Too simple to comment. */
class BoolPair { class BoolTriple {
boolean isLegal; boolean isLegal;
boolean isLegalButSanskrit; // some subset are legal but legal Sanskrit -- the single sanskrit stacks are this way, such as B+DE.
boolean isLegalAndHasAVowelOnRoot; boolean isLegalAndHasAVowelOnRoot;
BoolPair(boolean isLegal, boolean isLegalAndHasAVowelOnRoot) { BoolTriple(boolean isLegal,
boolean isLegalButSanskrit,
boolean isLegalAndHasAVowelOnRoot) {
this.isLegal = isLegal; this.isLegal = isLegal;
this.isLegalButSanskrit = isLegalButSanskrit;
this.isLegalAndHasAVowelOnRoot = isLegalAndHasAVowelOnRoot; this.isLegalAndHasAVowelOnRoot = isLegalAndHasAVowelOnRoot;
} }
} }