diff --git a/source/org/thdl/tib/input/DuffPane.java b/source/org/thdl/tib/input/DuffPane.java index 2e29a0e..5b86e9d 100644 --- a/source/org/thdl/tib/input/DuffPane.java +++ b/source/org/thdl/tib/input/DuffPane.java @@ -345,18 +345,24 @@ public class DuffPane extends TibetanPane implements FocusListener { * The keymap defines a default behavior for key presses * in both Tibetan and Roman mode. */ - private void setupKeymap() { - Action defaultAction = new AbstractAction() { - public void actionPerformed(ActionEvent e) { - DuffPane.this.performKeyStroke(e.getModifiers(), - e.getActionCommand()); - } - }; - createActionTable(this); - Keymap keymap = addKeymap("DuffBindings", getKeymap()); - keymap.setDefaultAction(defaultAction); - setKeymap(keymap); - } + private void setupKeymap() { + Action defaultAction = new AbstractAction() { + public void actionPerformed(ActionEvent e) { + try { + DuffPane.this.performKeyStroke(e.getModifiers(), + e.getActionCommand()); + } catch (Throwable t) { + System.err.println("JSKAD ERROR: " + t); + t.printStackTrace(System.err); + System.exit(1); + } + } + }; + createActionTable(this); + Keymap keymap = addKeymap("DuffBindings", getKeymap()); + keymap.setDefaultAction(defaultAction); + setKeymap(keymap); + } private void createActionTable(JTextComponent textComponent) { actions = new Hashtable(); @@ -746,78 +752,82 @@ public class DuffPane extends TibetanPane implements FocusListener { * * @param v the vowel (in Wylie) you want to insert */ - private void putVowel(String v) { - if (caret.getDot()==0) { - if (!TibetanMachineWeb.isAChenRequiredBeforeVowel()) - printAChenWithVowel(v); + private void putVowel(String v) { + if (caret.getDot()==0) { + if (!TibetanMachineWeb.isAChenRequiredBeforeVowel()) + printAChenWithVowel(v); - return; - } + return; + } - AttributeSet attr = getTibDoc().getCharacterElement(caret.getDot()-1).getAttributes(); - String fontName = StyleConstants.getFontFamily(attr); - int fontNum; + AttributeSet attr = getTibDoc().getCharacterElement(caret.getDot()-1).getAttributes(); + String fontName = StyleConstants.getFontFamily(attr); + int fontNum; - if (0 != (fontNum = TibetanMachineWeb.getTMWFontNumber(fontName))) { - try { - char c2 = getTibDoc().getText(caret.getDot()-1, 1).charAt(0); - int k = (int)c2; - if (k<32 || k>126) { //if previous character is formatting or some other non-character - if (!TibetanMachineWeb.isAChenRequiredBeforeVowel()) - printAChenWithVowel(v); + if (0 != (fontNum = TibetanMachineWeb.getTMWFontNumber(fontName))) { + try { + char c2 = getTibDoc().getText(caret.getDot()-1, 1).charAt(0); + int k = (int)c2; + if (k<32 || k>126) { //if previous character is formatting or some other non-character + if (!TibetanMachineWeb.isAChenRequiredBeforeVowel()) + printAChenWithVowel(v); - return; - } + return; + } - String wylie + String wylie = TibetanMachineWeb.getWylieForGlyph(fontNum, k, TibTextUtils.weDoNotCareIfThereIsCorrespondingWylieOrNot); - if (TibetanMachineWeb.isWyliePunc(wylie)) { - if (charList.isEmpty() && !TibetanMachineWeb.isAChenRequiredBeforeVowel()) { - printAChenWithVowel(v); - return; - } - } + if (TibetanMachineWeb.isWyliePunc(wylie)) { + if (charList.isEmpty() && !TibetanMachineWeb.isAChenRequiredBeforeVowel()) { + printAChenWithVowel(v); + return; + } + } - DuffCode dc_1 = null; - DuffCode dc_2 = new DuffCode(fontNum, c2); + DuffCode dc_1 = null; + DuffCode dc_2 = new DuffCode(fontNum, c2); - if (caret.getDot() >= 2) { - attr = getTibDoc().getCharacterElement(caret.getDot()-2).getAttributes(); - fontName = StyleConstants.getFontFamily(attr); - if (0 != (fontNum = TibetanMachineWeb.getTMWFontNumber(fontName))) { - c2 = getTibDoc().getText(caret.getDot()-2, 1).charAt(0); - dc_1 = new DuffCode(fontNum, c2); - } - } + if (caret.getDot() >= 2) { + attr = getTibDoc().getCharacterElement(caret.getDot()-2).getAttributes(); + fontName = StyleConstants.getFontFamily(attr); + if (0 != (fontNum = TibetanMachineWeb.getTMWFontNumber(fontName))) { + c2 = getTibDoc().getText(caret.getDot()-2, 1).charAt(0); + dc_1 = new DuffCode(fontNum, c2); + } + } - java.util.List before_vowel = new ArrayList(); - if (null != dc_1) - before_vowel.add(dc_1); + java.util.List before_vowel = new ArrayList(); + if (null != dc_1) + before_vowel.add(dc_1); - before_vowel.add(dc_2); - java.util.List after_vowel = new ArrayList(); - TibTextUtils.getVowel(after_vowel, dc_1, dc_2, v); + before_vowel.add(dc_2); + java.util.List after_vowel = new ArrayList(); + try { + TibTextUtils.getVowel(after_vowel, dc_1, dc_2, v); + } catch (IllegalArgumentException e) { + // drop this vowel silently. + } if (after_vowel.size() >= before_vowel.size()) { setNumberOfGlyphsForLastVowel(after_vowel.size() - before_vowel.size()); } else { setNumberOfGlyphsForLastVowel(0); - ThdlDebug.noteIffyCode(); // I don't think this can ever happen, but... + // can happen for pou (as opposed to puo) (FIXME) } - redrawGlyphs(before_vowel, after_vowel); - } - catch(BadLocationException ble) { - System.out.println("no--can't insert here"); + redrawGlyphs(before_vowel, after_vowel); + } + catch(BadLocationException ble) { + System.out.println("no--can't insert here"); ThdlDebug.noteIffyCode(); - } - } - else { //0 font means not Tibetan font, so begin new Tibetan font section - if (!TibetanMachineWeb.isAChenRequiredBeforeVowel()) - printAChenWithVowel(v); - } - } + } + } + else { //0 font means not Tibetan font, so begin new Tibetan font section + if (!TibetanMachineWeb.isAChenRequiredBeforeVowel()) + printAChenWithVowel(v); + } + } /** @@ -840,14 +850,18 @@ public class DuffPane extends TibetanPane implements FocusListener { * * @param v the vowel (in Wylie) which you want to print with ACHEN */ - private void printAChenWithVowel(String v) { - DuffCode[] dc_array = (DuffCode[])TibetanMachineWeb.getTibHash().get(TibetanMachineWeb.ACHEN); - DuffCode dc = dc_array[TibetanMachineWeb.TMW]; - java.util.List achenlist = new ArrayList(); - TibTextUtils.getVowel(achenlist, dc, v); - DuffData[] dd = TibTextUtils.convertGlyphs(achenlist); - getTibDoc().insertDuff(caret.getDot(), dd); - } + private void printAChenWithVowel(String v) { + DuffCode[] dc_array = (DuffCode[])TibetanMachineWeb.getTibHash().get(TibetanMachineWeb.ACHEN); + DuffCode dc = dc_array[TibetanMachineWeb.TMW]; + java.util.List achenlist = new ArrayList(); + try { + TibTextUtils.getVowel(achenlist, dc, v); + } catch (IllegalArgumentException e) { + // drop this vowel silently. + } + DuffData[] dd = TibTextUtils.convertGlyphs(achenlist); + getTibDoc().insertDuff(caret.getDot(), dd); + } /** * Puts a bindu/anusvara at the current caret position. @@ -884,7 +898,8 @@ public class DuffPane extends TibetanPane implements FocusListener { DuffCode dc = new DuffCode(fontNum, c2); java.util.List beforecaret = new ArrayList(); beforecaret.add(dc); - java.util.List bindulist = TibTextUtils.getBindu(dc); + java.util.List bindulist = new LinkedList(); + TibTextUtils.getBindu(bindulist, dc); redrawGlyphs(beforecaret, bindulist); initKeyboard(); return; @@ -895,7 +910,9 @@ public class DuffPane extends TibetanPane implements FocusListener { } } - DuffData[] dd = TibTextUtils.convertGlyphs(TibTextUtils.getBindu(null)); + java.util.List binduList = new LinkedList(); + TibTextUtils.getBindu(binduList, null); + DuffData[] dd = TibTextUtils.convertGlyphs(binduList); getTibDoc().insertDuff(caret.getDot(), dd); initKeyboard(); } diff --git a/source/org/thdl/tib/input/TMW_RTF_TO_THDL_WYLIETest1ResultConversion.expected b/source/org/thdl/tib/input/TMW_RTF_TO_THDL_WYLIETest1ResultConversion.expected index 8ca0166..abeab32 100644 --- a/source/org/thdl/tib/input/TMW_RTF_TO_THDL_WYLIETest1ResultConversion.expected +++ b/source/org/thdl/tib/input/TMW_RTF_TO_THDL_WYLIETest1ResultConversion.expected @@ -37,13 +37,13 @@ rgyal ba kun dngos mtsho skyes rdo rje bstan pa'i rtsa lag thams cad mkhyen pa z bka' drin gzugs can dbyig 'dzin lto 'dir shong 'gyur min na kun mkhyen srang las gang gis gzhal//\par \par li khri'i lcug phran mkhyen pa'i snang ba can//\par -'jam mgon bloX. yi lang tsho bazaX.nga po'i tshon//\par +'jam mgon bloX yi lang tsho bazaX.nga po'i tshon//\par kha dog so sor bkra ba'i gragaX.sa paX.'i rgyan//\par phyogs bral rna lung 'god mkhas rtag tu rgyal//\f2\fs44\i0\b0\ul0\cf0\par \par \f1\fs28\i0\b0\ul0 dpal ldan chos kyi rang bzhin ngos yangs par//\par gnas lnga'i bang mdzod 'byor par lhun grub pa'i//\par -mnga' sgyur bgrod byed ban+d+hu dziA wa ka/\par +mnga' sgyur bgrod byed ban+d+hu dzI wa ka/\par rmad byung cod pan mchog tu rnam par bsngags//\par \par rgya hor lung dang rigs kyis mngon mtho zhing //\par @@ -77,7 +77,7 @@ deb ther rdzogs ldan gsar pa'i dga' ston mgron//\par \f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 de yang ston pa 'di nyid kyi gdung la nyi ma'i gnyen dang bu ram shing pa shAkya zhes brjod pa ni/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 yul gru 'dzin du rgyal po brgya tham pa byung ba'i mtha' ma rna ba can gyi bu gau ta ma dang b+ha ra d+h+wa dza gnyis las/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 sngon mas rab tu byung ste lo ma'i spyil po yid du 'ong ba'i bsam gtan gyi khang bur gnas pa'i tshe/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 g.yo can pad+ma'i rtsa lag dang bzang mo dga' mgur spyod pa'i dus las yol bas/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 bzang mo bas.d pa'i ral gri khrag can lo ma'i spyil po'i nye 'dabs su bor ba'i rkyen gyis gau ta ma bsal shing la bskyon pa'i dus/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 drang srong mdog nag gi rdzu 'phrul gyis gru char gyi thigs pa dang /\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 dri bzhon lus la reg pa'i rkyen gyis 'dod pa'i gnas rjes su dran pa'i yid kyi shing rtas drangs te/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 khams kyi dwangs ma gnyis lhan cig tu lhung ba las sgo nga gnyis su gyur pa nyi 'od kyis bskyangs shing khye'u gnyis bu ram gyi shing gseb tu zhugs pas ming de ltar du grags la/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 bu ram shing pa'i brgyud la rgyal po brgya byung ba'i mtha' ma 'phags skyes po/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 'phags skyes po la sras bzhi byung yang btsun mo 'das te slar stobs ldan gyi rgyal po gzhan zhig gi bu mo khab tu blangs par sras byung na rgyal srid du dbang bskur bar khas blangs pas dam bcas/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 sras sngon ma rnams spyugs pa rang rang gi sring mo dang bcas te chu bo skal ldan shing rta'i 'gram du ming sring lhan cig tu 'dus pa las shAkya'i brgyud ces grags pa yin no//\par \f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 de yang kun mkhyen nyi ma'i gnyen 'di nyid 'khrungs lor mkhas grub rnams kyi bzhed tshul mang du mchis kyang don grub ces pa sa lug dbyar zla 'bring po'i chu stod kyi nya ba'i tshes bco lnga'i dus su/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 yum gso sbyong la gnas pa'i lhums su glang po che'i phrug gu thal dkar gyi rnam par gyur nas zhugs/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 zla ba bcu phal cher lon pa legs par sbyar ba lha'i skad du ru dra zhes pa bsil ldan pa rnams drag por brjod pa/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 rgya nag pa dbang thang dang bstun pa'i ming ging shing /\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 bod yul du lcags pho spre'ura 'bod pa'i dpyid tha sa gas nya ba me tog can gyi zla ba/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 rgya nag pa si yol du brjod pa'i yar ngo'i tshes bdun gyi res gza' 'od zer bdun pa dang /\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 las skar rgyal la babs pa na lum+bi ni'i nags su sku bltams/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 de nas lang tsho'i kun+da rnam par rgyas pa'i zil mngar gyi sgyu rtsal drug cu rtsa bzhi'i yal 'dab kyi yon tan kun la rang gir bgyis shing /\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 pad+mo'i snyems pa thogs pa'i ri dwags mig can gyi don yongs su tshang ba'i grags 'dzin ma dang sa mtsho ma sogs btsun mor dbang bskur te lo nyi shu rtsa dgu'i bar du nyi 'og gyi rgyal phran gnyis 'thung gi spyi bor 'jigs pa med pa'i rgyal thabs kyi bya ba'i skad gsang mthon pos rang byan du 'gying bar dbang thob na'ang /\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 mchog gi sprul sku'i mdzad pa ston pa'i ched du/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 dgung lo nyer dgu pa kun 'dzin zhes pa sa pho byi ba la rang byung gi sgo nas/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 rab byung gi dngos po yongs su rdzogs te/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 lo drug tu dka' ba spyad pas lang tsho phun sum tshogs pa las gzhan du gyur pa na/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 dga' mo dang dga' stobs mas ba stong gi 'o ma lan bcu drug gi bar du nying khur byas pa'i 'o thug sbrang rtsi dang sbyar ba'i kun tu bzang po'i mchod sprin gyis sku su war+Na'i mchod sdong nyi gzhon 'bum gyis 'khyud pa ltar gyur te rdo rje gdan du byang chub kyi shing drung na 'chi med bdag pos rtswa 'tshong gyi rnam par sprul te phul ba'i rtswa'i gdan la bzhugs nas/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 chu srin rgyal mtshan can sde dang bcas pa'i g.yul las rnam par rgyal ba'i ba dan nam mkha'i mtha' klas par bsgrengs te/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 dgung lo so lnga pa rgyal ba zhes pa shing rta'i sa gas nya ba'i bco lnga'i skya rengs shar ba na ye shes mchog gi bdud rtsi mngon du mdzad do/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 /mdo las/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 de'i nyin mo zla ba sgra gcan gyis bzung ba dang /\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 sgra gcan 'dzin dang bdud rtsi zas kyi khye'u zhig kyang skyes par bshad pa'i gza' 'dzin gyi ri mo ni gza' gnas su gcig chu tshod so brgyad zla skar gyi skar gnas su bcu drug chu tshod stong pa sgra gcan gdong gi skar mar bcu drug chu tshod nyer dgu byung bas gza' 'dzin gyi ri mo ni tshad ma'i aA dar+sha gtsang ma'i ngos su gsal bar shar ba yin no/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 /\par \f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 de nas zhag zhe dgu'i bar du dam pa'i chos kyi bdud rtsi'i sgo brgya ma phye ba'i tshul bstan pas/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 'jig rten gyi byed po gser mngal can gyis skal pa mchog gi bzang po'i dpal dang ldan pa'i don du gsol ba la brten nas/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 mchog dman gyi gdul bya so so'i blo dang 'tsham par zab rgyas chos kyi sgo glegs rnam par bkral nas theg pa che chung gi rigs can gyi yol go/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 rang 'dod pa'i tshogs mtha' dag gcig tu 'jo ba'i ro zad mi shes pa mkha' khyab tu bro ba'i 'khor lo bskor te/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 mthar gyad kyi yul rtswa mchog gi grong du dgung lo gya gcig pa dpa' bo zhes pa lcags 'brug gi lor mi 'gyur ba'i bde ba chen po chos kyi sku'i mkha' klong du sna tshogs pa'i sprul sku'i zlos gar gyis rol pa'i chu 'dzin gzugs sku tha mi dad pa'i mdzad pa bstan to/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 /\par -\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 de ltar bstan pa rin po che yul dbus 'gyur nas char dus kyi dal 'gro'i klung rang babs su 'gyur ba'i gshis lugs bzhin tha grur khyab pa las/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 'gro ba'i bla ma shAkya seng ge gya gcig pa lcags 'brug lo nag nyar shriA d+hA n+ya ka Ta kar gsang ba kun gyi gan mdzod rnam par 'dzin pa'i mi'i dbang phyug zla ba bzang por sho lo ka stong phrag bcu gnyis kyi bdag nyid can gyi dpal dang po'i sangs rgyas dus kyi 'khor lo'i rgyud gsungs pa sham+b+ha lar spyan drangs nas ma la ya'i skyed tshal du sku gsungs thugs yongs su rdzogs pa'i dkyil 'khor gyi snang brnyan ye shes kyi gzi byin mngon par 'bar ba'i sgyu ma'i gar gyis rtsen pa'i 'dren byed kyi dga' ston gsar pa bskrun te/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 rdo rje theg pa'i bgo skal ka lA pa'i grong khyer gyi skye rgur 'gyed pa'i mu mtha' bral ba'i mdzad pa nas bzung 'jigs pa med pa'i gdong lnga'i khri la bgrod byed rtsibs stong 'khor lo'i ri mo mchog tu bkra ba'i chos rgyal rigs ldan sum cu rtsa gsum du byon pa mtha' dag gis phyi nang gzhan gsum sbyor ba'i gsang chen nA da'i sgra dbyangs kyi nor bu'i 'phyang 'phrul las ldan gyi rna bar spud par mdzad pa'i tha ma/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 drag po'i 'khor lo can gyis dgung lo dgu bcu rtsa brgyad par kla klo'i dpung tshogs mtha' dag ming gi lhag mar byas nas snga na med pas sa chen po'i khor yug kun tu khyab par mdzad do/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 /de yang rigs ldan drag po dgung lo lnga bcu'i steng du kla klo bcom par bzhed pa ltar na/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 kla klo'i gnas tshad stong dang brgyad brgyar bshad pa las lo grangs bzhi bcu rtsa bdun tsam mi 'grig pa'i skyon yod cing /\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 de dag rags rtsis su 'chad na zhib rtsis ni 'byung ba'i skabs med pas gzur gnas kyi shes ldan rnams soms shig\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 de ltar sham b+ha las mtshon te/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 ao rgyan/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 bal yul/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 rgya hor sogs su bstan pa byung tshul ni mtha' yas kyang mang gis dogs pas ma spros so/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 /\par +\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 de ltar bstan pa rin po che yul dbus 'gyur nas char dus kyi dal 'gro'i klung rang babs su 'gyur ba'i gshis lugs bzhin tha grur khyab pa las/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 'gro ba'i bla ma shAkya seng ge gya gcig pa lcags 'brug lo nag nyar shrI d+hA n+ya ka Ta kar gsang ba kun gyi gan mdzod rnam par 'dzin pa'i mi'i dbang phyug zla ba bzang por sho lo ka stong phrag bcu gnyis kyi bdag nyid can gyi dpal dang po'i sangs rgyas dus kyi 'khor lo'i rgyud gsungs pa sham+b+ha lar spyan drangs nas ma la ya'i skyed tshal du sku gsungs thugs yongs su rdzogs pa'i dkyil 'khor gyi snang brnyan ye shes kyi gzi byin mngon par 'bar ba'i sgyu ma'i gar gyis rtsen pa'i 'dren byed kyi dga' ston gsar pa bskrun te/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 rdo rje theg pa'i bgo skal ka lA pa'i grong khyer gyi skye rgur 'gyed pa'i mu mtha' bral ba'i mdzad pa nas bzung 'jigs pa med pa'i gdong lnga'i khri la bgrod byed rtsibs stong 'khor lo'i ri mo mchog tu bkra ba'i chos rgyal rigs ldan sum cu rtsa gsum du byon pa mtha' dag gis phyi nang gzhan gsum sbyor ba'i gsang chen nA da'i sgra dbyangs kyi nor bu'i 'phyang 'phrul las ldan gyi rna bar spud par mdzad pa'i tha ma/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 drag po'i 'khor lo can gyis dgung lo dgu bcu rtsa brgyad par kla klo'i dpung tshogs mtha' dag ming gi lhag mar byas nas snga na med pas sa chen po'i khor yug kun tu khyab par mdzad do/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 /de yang rigs ldan drag po dgung lo lnga bcu'i steng du kla klo bcom par bzhed pa ltar na/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 kla klo'i gnas tshad stong dang brgyad brgyar bshad pa las lo grangs bzhi bcu rtsa bdun tsam mi 'grig pa'i skyon yod cing /\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 de dag rags rtsis su 'chad na zhib rtsis ni 'byung ba'i skabs med pas gzur gnas kyi shes ldan rnams soms shig\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 de ltar sham b+ha las mtshon te/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 ao rgyan/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 bal yul/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 rgya hor sogs su bstan pa byung tshul ni mtha' yas kyang mang gis dogs pas ma spros so/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 /\par \f4\fs24\i0\b0\ul0\cf0\par \pard\ql\f1\fs28\i0\b0\ul0\par \fs24\par diff --git a/source/org/thdl/tib/input/TibetanConverter.java b/source/org/thdl/tib/input/TibetanConverter.java index 7614027..8102fad 100644 --- a/source/org/thdl/tib/input/TibetanConverter.java +++ b/source/org/thdl/tib/input/TibetanConverter.java @@ -171,10 +171,10 @@ public class TibetanConverter implements FontConverterConstants { out.println(""); out.println(""); out.println(" In --to... and --acip-to... modes, needs one argument, the name of the"); - out.println(" TibetanMachineWeb RTF"); - out.println(" file (for --to-wylie, --to-unicode, and --to-tibetan-machine) or the name of"); + out.println(" TibetanMachineWeb RTF file (for --to-wylie, --to-wylie-text, --to-acip-text,"); + out.println(" --to-acip, --to-unicode, and --to-tibetan-machine) or the name of"); out.println(" the TibetanMachine RTF file (for --to-tibetan-machine-web) or the name of the"); - out.println(" ACIP text file (for --acip-to-unicode). Writes the"); + out.println(" ACIP text file (for --acip-to-unicode or --acip-to-tmw). Writes the"); out.println(" result to standard output (after dealing with the curly brace problem if"); out.println(" the input is TibetanMachineWeb). Exit code is zero on success, 42 if some"); out.println(" glyphs couldn't be converted (in which case the output is just those glyphs),"); @@ -364,14 +364,14 @@ public class TibetanConverter implements FontConverterConstants { + ((TMW_TO_WYLIE_TEXT == ct) ? 1 : 0) == 1); long numAttemptedReplacements[] = new long[] { 0 }; - if (TMW_TO_WYLIE == ct) { + if (TMW_TO_WYLIE == ct || TMW_TO_WYLIE_TEXT == ct) { // Convert to THDL Wylie: if (!tdoc.toWylie(0, tdoc.getLength(), numAttemptedReplacements)) { exitCode = 44; } - } else if (TMW_TO_ACIP == ct) { + } else if (TMW_TO_ACIP == ct || TMW_TO_ACIP_TEXT == ct) { // Convert to ACIP: if (!tdoc.toACIP(0, tdoc.getLength(), @@ -411,7 +411,10 @@ public class TibetanConverter implements FontConverterConstants { // Write to standard output the result: if (TMW_TO_WYLIE_TEXT == ct || TMW_TO_ACIP_TEXT == ct) { try { - tdoc.writeTextOutput(new BufferedWriter(new OutputStreamWriter(out))); + BufferedWriter bw + = new BufferedWriter(new OutputStreamWriter(out)); + tdoc.writeTextOutput(bw); + bw.flush(); } catch (IOException e) { exitCode = 40; } diff --git a/source/org/thdl/tib/scanner/Manipulate.java b/source/org/thdl/tib/scanner/Manipulate.java index 1d7571f..3584c91 100644 --- a/source/org/thdl/tib/scanner/Manipulate.java +++ b/source/org/thdl/tib/scanner/Manipulate.java @@ -44,11 +44,18 @@ public class Manipulate return ch=='a' || ch=='e' || ch=='i' || ch=='o' || ch=='u'; } - /** Returns null on error. */ public static String wylieToAcip(String palabra) { // DLC FIXME: for unknown things, return null. - if (palabra.equals("@#")) return "*"; + if (palabra.equals("@#")) return "#"; + if (palabra.equals("!")) return "`"; + if (palabra.equals("b+h")) return "BH"; + if (palabra.equals("d+h")) return "DH"; + if (palabra.equals("X")) return null; + if (palabra.equals("iA")) return null; + if (palabra.equals("ai")) return "EE"; + if (palabra.equals("au")) return "OO"; + if (palabra.equals("$")) return null; if (palabra.startsWith("@") || palabra.startsWith("#")) return null; // we can't convert this in isolation! We need context. char []caract; @@ -93,7 +100,7 @@ public class Manipulate nuevaPalabra = replace(nuevaPalabra, "u", "'U"); nuevaPalabra = replace(nuevaPalabra, "-I", "i"); nuevaPalabra = replace(nuevaPalabra, "/", ","); - nuevaPalabra = replace(nuevaPalabra, "_", " "); + nuevaPalabra = replace(nuevaPalabra, "_", " "); nuevaPalabra = fixWazur(nuevaPalabra); return nuevaPalabra; } diff --git a/source/org/thdl/tib/text/DuffCode.java b/source/org/thdl/tib/text/DuffCode.java index 4e11258..01db664 100644 --- a/source/org/thdl/tib/text/DuffCode.java +++ b/source/org/thdl/tib/text/DuffCode.java @@ -168,7 +168,7 @@ public final class DuffCode { * recursion (manifesting as a StackOverflowError)) */ public String toString(boolean TMW) { boolean[] err = new boolean[] { false }; - return "Wylie wouldn't produce o'i for an input file containing merely TMW9.61 -- it would produce \u0f7c,\u0f60,\u0f72 -- round-trip shows why. - if (null != dc_v) - l.add(dc_v); + if (vowel.equals(e_VOWEL)) { + String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2); + DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_e); + if (null == dc_v && null != context_1) { + hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_1); + dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_e); + } - return; - } + if (!context_added[0]) { + context_added[0] = true; + if (context_1 != null) + l.add(context_1); - if (vowel.equals(e_VOWEL)) { - String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2); - DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_e); - if (null == dc_v && null != context_1) { - hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_1); - dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_e); - } + l.add(context_2); + } + + if (null != dc_v) + l.add(dc_v); + else throw new IllegalArgumentException("dropping vowels is bad"); - if (context_1 != null) - l.add(context_1); + return; + } - l.add(context_2); + if (vowel.equals(o_VOWEL)) { + String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2); + DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_o); + if (null == dc_v && null != context_1) { + hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_1); + dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_o); + } - if (null != dc_v) - l.add(dc_v); + if (!context_added[0]) { + context_added[0] = true; + if (context_1 != null) + l.add(context_1); - return; - } + l.add(context_2); + } - if (vowel.equals(o_VOWEL)) { - String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2); - DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_o); - if (null == dc_v && null != context_1) { - hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_1); - dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_o); - } + if (null != dc_v) + l.add(dc_v); + else throw new IllegalArgumentException("dropping vowels is bad"); - if (context_1 != null) - l.add(context_1); + return; + } - l.add(context_2); + //next come the vowels u, A, and U + //these three vowels are grouped together because they all + //can cause the preceding context to change. in particular, + //both u and A cannot be affixed to ordinary k or g, but + //rather the shortened versions of k and g - therefore, - if (null != dc_v) - l.add(dc_v); + if (vowel.equals(u_VOWEL)) { + String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2); + DuffCode halfHeight = TibetanMachineWeb.getHalfHeightGlyph(hashKey_context); + DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_u); - return; - } + if (!context_added[0]) { + context_added[0] = true; + if (null != context_1) + l.add(context_1); -//next come the vowels u, A, and U -//these three vowels are grouped together because they all -//can cause the preceding context to change. in particular, -//both u and A cannot be affixed to ordinary k or g, but -//rather the shortened versions of k and g - therefore, + if (null == halfHeight) + l.add(context_2); + else + l.add(halfHeight); + } + + if (null != dc_v) + l.add(dc_v); + else throw new IllegalArgumentException("dropping vowels is bad"); - if (vowel.equals(u_VOWEL)) { - String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2); - DuffCode halfHeight = TibetanMachineWeb.getHalfHeightGlyph(hashKey_context); - DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_u); + return; + } - if (null != context_1) - l.add(context_1); + if (vowel.equals(A_VOWEL)) { + String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2); + DuffCode halfHeight = TibetanMachineWeb.getHalfHeightGlyph(hashKey_context); + DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_A); - if (null == halfHeight) - l.add(context_2); - else - l.add(halfHeight); + if (!context_added[0]) { + context_added[0] = true; + if (null != context_1) + l.add(context_1); - if (null != dc_v) - l.add(dc_v); + if (null == halfHeight) + l.add(context_2); + else + l.add(halfHeight); + } + + if (null != dc_v) + l.add(dc_v); + else throw new IllegalArgumentException("dropping vowels is bad"); - return; - } + return; + } - if (vowel.equals(A_VOWEL)) { - String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2); - DuffCode halfHeight = TibetanMachineWeb.getHalfHeightGlyph(hashKey_context); - DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_A); + if (vowel.equals(U_VOWEL)) { + String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2); + DuffCode halfHeight = TibetanMachineWeb.getHalfHeightGlyph(hashKey_context); + DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_U); - if (null != context_1) - l.add(context_1); + if (!context_added[0]) { + context_added[0] = true; + if (null != context_1) + l.add(context_1); - if (null == halfHeight) - l.add(context_2); - else - l.add(halfHeight); + if (null == halfHeight) + l.add(context_2); + else + l.add(halfHeight); + } + + if (null != dc_v && !TibetanMachineWeb.isTopVowel(context_2)) + l.add(dc_v); + else throw new IllegalArgumentException("dropping vowels is bad"); - if (null != dc_v) + return; + } - l.add(dc_v); + //finally, the vowels I and ACIP and TMW->EWTS. Test it. When it + // does, revamp TGCPair to have a set of vowels. The + // output order should be consistent with the + // Unicode-imposed order on vowels. } else { // number or weird thing: @@ -1134,12 +1202,6 @@ public class TibTextUtils implements THDLWylieConstants { if (isAppendageNonVowelWylie(wylie)) { candidateType = candidateType.substring("maybe-".length()).intern(); - // So that we get 'am, not 'm; 'ang, not 'ng: - - // FIXME: cludge: weird place to do this. - // pa'am, not pa'm is what we want, sure, - // but doing this here is ugly. - tp.setWylie(WYLIE_aVOWEL + tp.getWylie()); } else { if (null != warnings) warnings.append("Found a tsheg bar that has an achung (" + ACHUNG + ") tacked on, followed by some other thing whose wylie is " + wylie + "\n"); @@ -1264,7 +1326,7 @@ public class TibTextUtils implements THDLWylieConstants { if (TibetanMachineWeb.isWylieTibetanConsonantOrConsonantStack(wylie) || TibetanMachineWeb.isWylieSanskritConsonantStack(wylie)) { translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie)); - } else { + } else if (i + 1 < sz) { if (TGCPair.CONSONANTAL_WITH_VOWEL != cls && TGCPair.SANSKRIT_WITH_VOWEL != cls) translitBuffer.append(EWTSNotACIP ? WYLIE_DISAMBIGUATING_KEY : '-'); @@ -1277,7 +1339,8 @@ public class TibTextUtils implements THDLWylieConstants { int leftover = sz + 1; // Appendaged vs. not appendaged? it affects nothing at - // this stage. + // this stage except for pa'm vs. pa'am. + boolean appendaged = (candidateType.startsWith("appendaged-")); candidateType = getCandidateTypeModuloAppendage(candidateType); if ("prefix/root-root/suffix-suffix/postsuffix" == candidateType) { @@ -1433,7 +1496,9 @@ public class TibTextUtils implements THDLWylieConstants { // append the wylie/ACIP left over: for (int i = leftover; i < sz; i++) { TGCPair tp = (TGCPair)gcs.get(i); - translitBuffer.append(EWTSNotACIP ? tp.getWylie() : tp.getACIP()); + translitBuffer.append(EWTSNotACIP + ? tp.getWylie(appendaged) + : tp.getACIP(appendaged)); } } } @@ -1468,6 +1533,7 @@ public class TibTextUtils implements THDLWylieConstants { ArrayList glyphList = new ArrayList(); StringBuffer translitBuffer = new StringBuffer(); + // DLC FIXME: " " should become " ", and test with ACIP # and *. for (int i=0; i 0 && dcs[i - 1].getCharacter() == '\r') + translitBuffer.append("\r\n"); + else + translitBuffer.append(ch); + } translitBuffer.append(ch); } else { String wylie = TibetanMachineWeb.getWylieForGlyph(dcs[i], noSuch); diff --git a/source/org/thdl/tib/text/TibetanMachineWeb.java b/source/org/thdl/tib/text/TibetanMachineWeb.java index 87f3f77..568e78d 100644 --- a/source/org/thdl/tib/text/TibetanMachineWeb.java +++ b/source/org/thdl/tib/text/TibetanMachineWeb.java @@ -994,6 +994,8 @@ private static boolean isAmbHelper(String y) { * @return true if x + y is ambiguous in the Extended Wylie * transliteration, false if not */ public static boolean isAmbiguousWylie(String x, String y) { + // DLC NOW: BDE vs. B+DE -- TMW->ACIP should give B+DE to be very friendly to machines. + // What about ambiguity between wa-zur and wa? dwa vs. d.wa, e.g.? // Some would say it doesn't matter, because that's illegal. wa // doesn't take any prefixes. But I want even illegal stuff to @@ -1719,19 +1721,21 @@ private static String acipForGlyph(String hashKey) { * documented in www/htdocs/TMW_RTF_TO_THDL_WYLIE.html, so change * them both when you change this. */ private static String getTMWToWylieErrorString(DuffCode dc) { - return "<<[[JSKAD_TMW_TO_WYLIE_ERROR_NO_SUCH_WYLIE: Cannot convert DuffCode " + return "<<[[JSKAD_TMW_TO_WYLIE_ERROR_NO_SUCH_WYLIE: Cannot convert " + dc.toString(true) - + " to THDL Extended Wylie. Please see the documentation for the TMW font and transcribe this yourself.]]>>"; + + " to THDL Extended Wylie. Please see the documentation for the TM or TMW font and transcribe this yourself.]]>>"; } /** Error that appears in a document when some TMW cannot be * transcribed in ACIP. This error message is * documented in www/htdocs/TMW_RTF_TO_THDL_WYLIE.html (DLC NOT YET), so change * them both when you change this. */ +static String getTMWToACIPErrorString(String it) { + return "[# JSKAD_TMW_TO_ACIP_ERROR_NO_SUCH_ACIP: Cannot convert " + it + " to ACIP. Please transcribe this yourself.]"; +} + private static String getTMWToACIPErrorString(DuffCode dc) { - return "<<[[JSKAD_TMW_TO_ACIP_ERROR_NO_SUCH_ACIP: Cannot convert DuffCode " - + dc.toString(true) - + " to ACIP. Please see the documentation for the TMW font and transcribe this yourself.]]>>"; + return getTMWToACIPErrorString(dc.toString(true)); } /** diff --git a/source/org/thdl/tib/text/ttt/ACIPConverter.java b/source/org/thdl/tib/text/ttt/ACIPConverter.java index 18a0b1c..f7bb77b 100644 --- a/source/org/thdl/tib/text/ttt/ACIPConverter.java +++ b/source/org/thdl/tib/text/ttt/ACIPConverter.java @@ -446,12 +446,18 @@ public class ACIPConverter { if (!lastGuyWasNonPunct || (null != lastGuy && (lpl = lastGuy.get(lastGuy.size() - 1)).size() == 1 - && lpl.get(0).getLeft().equals("G") - && // it's (G . anything) - // followed by some number - // of spaces (at least one, - // this one) and then a - // comma: + // "GU ," and "KU ," each have + // tshegs, but "GI ," and "KI + // ," each have a Tibetan + // space. + && ((lpl.get(0).getLeft().equals("G") + || lpl.get(0).getLeft().equals("K")) + && (lpl.get(0).getRight().indexOf('U') < 0)) + && + // it's (G . anything) + // followed by some number of + // spaces (at least one, this + // one) and then a comma: peekaheadFindsSpacesAndComma(scan, i+1))) { if (null != writer) { unicode = " "; diff --git a/source/org/thdl/tib/text/ttt/ACIPRules.java b/source/org/thdl/tib/text/ttt/ACIPRules.java index 78d8577..81486ec 100644 --- a/source/org/thdl/tib/text/ttt/ACIPRules.java +++ b/source/org/thdl/tib/text/ttt/ACIPRules.java @@ -21,6 +21,7 @@ package org.thdl.tib.text.ttt; import java.util.HashSet; import java.util.ArrayList; import java.util.HashMap; +import java.util.StringTokenizer; import java.util.List; import org.thdl.tib.text.DuffCode; @@ -30,7 +31,7 @@ import org.thdl.tib.text.TibTextUtils; /** Canonizes some facts regarding the ACIP transcription system. * @author David Chandler */ -class ACIPRules { +public class ACIPRules { /** {Ksh}, the longest consonant, has 3 characters, so this is * three. */ public static int MAX_CONSONANT_LENGTH = 3; @@ -66,7 +67,7 @@ class ACIPRules { // DLC I'm on my own with 'O and 'E and 'OO and 'EE, but // GANG'O appears and I wonder... so here they are. It's // consistent with 'I and 'A and 'U, at least: all the vowels - // may appear as K'vowel. + // may appear as K'vowel. DLC FIMXE: ask. acipVowels.add(baseVowels[i][0]); acipVowels.add('\'' + baseVowels[i][0]); @@ -140,6 +141,43 @@ class ACIPRules { return consonants.contains(acip); } + private static HashMap wylieToACIP = null; + /** Returns the ACIP transliteration corresponding to the THDL + Extended Wylie atom EWTS, or null if EWTS is not + recognized. */ + public static String getACIPForEWTS(String EWTS) { + getWylieForACIPConsonant(null); + getWylieForACIPOther(null); + getWylieForACIPVowel(null); + String ans = (String)wylieToACIP.get(EWTS); + if (null == ans) { + StringBuffer finalAns = new StringBuffer(EWTS.length()); + StringTokenizer sTok = new StringTokenizer(EWTS, "-+", true); + while (sTok.hasMoreTokens()) { + String part, tok = sTok.nextToken(); + if (tok.equals("-") || tok.equals("+")) + part = tok; + else + part = (String)wylieToACIP.get(tok); + if (null == part) return null; + finalAns.append(part); + } + return finalAns.toString(); + } + return ans; + } + + /** Registers acip->wylie mappings in toWylie; registers + wylie->acip mappings in {@link #wylieToACIP}. */ + private static void putMapping(HashMap toWylie, String ACIP, String EWTS) { + toWylie.put(ACIP, EWTS); + if (null == wylieToACIP) { + wylieToACIP = new HashMap(75); + wylieToACIP.put("_", " "); // oddball. + } + wylieToACIP.put(EWTS, ACIP); + } + private static HashMap acipConsonant2wylie = null; /** Returns the EWTS corresponding to the given ACIP consonant * (without the "A" vowel). Returns null if there is no such @@ -149,52 +187,52 @@ class ACIPRules { acipConsonant2wylie = new HashMap(37); // oddball: - acipConsonant2wylie.put("V", "w"); + putMapping(acipConsonant2wylie, "V", "w"); // more oddballs: - acipConsonant2wylie.put("DH", "d+h"); - acipConsonant2wylie.put("BH", "b+h"); - acipConsonant2wylie.put("dH", "D+h"); - acipConsonant2wylie.put("DZH", "dz+h"); - acipConsonant2wylie.put("Ksh", "k+Sh"); - acipConsonant2wylie.put("GH", "g+h"); + putMapping(acipConsonant2wylie, "DH", "d+h"); + putMapping(acipConsonant2wylie, "BH", "b+h"); + putMapping(acipConsonant2wylie, "dH", "D+h"); + putMapping(acipConsonant2wylie, "DZH", "dz+h"); + putMapping(acipConsonant2wylie, "Ksh", "k+Sh"); + putMapping(acipConsonant2wylie, "GH", "g+h"); - acipConsonant2wylie.put("K", "k"); - acipConsonant2wylie.put("KH", "kh"); - acipConsonant2wylie.put("G", "g"); - acipConsonant2wylie.put("NG", "ng"); - acipConsonant2wylie.put("C", "c"); - acipConsonant2wylie.put("CH", "ch"); - acipConsonant2wylie.put("J", "j"); - acipConsonant2wylie.put("NY", "ny"); - acipConsonant2wylie.put("T", "t"); - acipConsonant2wylie.put("TH", "th"); - acipConsonant2wylie.put("D", "d"); - acipConsonant2wylie.put("N", "n"); - acipConsonant2wylie.put("P", "p"); - acipConsonant2wylie.put("PH", "ph"); - acipConsonant2wylie.put("B", "b"); - acipConsonant2wylie.put("M", "m"); - acipConsonant2wylie.put("TZ", "ts"); - acipConsonant2wylie.put("TS", "tsh"); - acipConsonant2wylie.put("DZ", "dz"); - acipConsonant2wylie.put("W", "w"); - acipConsonant2wylie.put("ZH", "zh"); - acipConsonant2wylie.put("Z", "z"); - acipConsonant2wylie.put("'", "'"); - acipConsonant2wylie.put("Y", "y"); - acipConsonant2wylie.put("R", "r"); - acipConsonant2wylie.put("L", "l"); - acipConsonant2wylie.put("SH", "sh"); - acipConsonant2wylie.put("S", "s"); - acipConsonant2wylie.put("H", "h"); - acipConsonant2wylie.put("A", "a"); - acipConsonant2wylie.put("t", "T"); - acipConsonant2wylie.put("th", "Th"); - acipConsonant2wylie.put("d", "D"); - acipConsonant2wylie.put("n", "N"); - acipConsonant2wylie.put("sh", "Sh"); + putMapping(acipConsonant2wylie, "K", "k"); + putMapping(acipConsonant2wylie, "KH", "kh"); + putMapping(acipConsonant2wylie, "G", "g"); + putMapping(acipConsonant2wylie, "NG", "ng"); + putMapping(acipConsonant2wylie, "C", "c"); + putMapping(acipConsonant2wylie, "CH", "ch"); + putMapping(acipConsonant2wylie, "J", "j"); + putMapping(acipConsonant2wylie, "NY", "ny"); + putMapping(acipConsonant2wylie, "T", "t"); + putMapping(acipConsonant2wylie, "TH", "th"); + putMapping(acipConsonant2wylie, "D", "d"); + putMapping(acipConsonant2wylie, "N", "n"); + putMapping(acipConsonant2wylie, "P", "p"); + putMapping(acipConsonant2wylie, "PH", "ph"); + putMapping(acipConsonant2wylie, "B", "b"); + putMapping(acipConsonant2wylie, "M", "m"); + putMapping(acipConsonant2wylie, "TZ", "ts"); + putMapping(acipConsonant2wylie, "TS", "tsh"); + putMapping(acipConsonant2wylie, "DZ", "dz"); + putMapping(acipConsonant2wylie, "W", "w"); + putMapping(acipConsonant2wylie, "ZH", "zh"); + putMapping(acipConsonant2wylie, "Z", "z"); + putMapping(acipConsonant2wylie, "'", "'"); + putMapping(acipConsonant2wylie, "Y", "y"); + putMapping(acipConsonant2wylie, "R", "r"); + putMapping(acipConsonant2wylie, "L", "l"); + putMapping(acipConsonant2wylie, "SH", "sh"); + putMapping(acipConsonant2wylie, "S", "s"); + putMapping(acipConsonant2wylie, "H", "h"); + putMapping(acipConsonant2wylie, "A", "a"); + putMapping(acipConsonant2wylie, "t", "T"); + putMapping(acipConsonant2wylie, "th", "Th"); + putMapping(acipConsonant2wylie, "d", "D"); + putMapping(acipConsonant2wylie, "n", "N"); + putMapping(acipConsonant2wylie, "sh", "Sh"); } return (String)acipConsonant2wylie.get(acip); } @@ -207,14 +245,14 @@ class ACIPRules { acipVowel2wylie = new HashMap(baseVowels.length * 4); for (int i = 0; i < baseVowels.length; i++) { - acipVowel2wylie.put(baseVowels[i][0], baseVowels[i][1]); - acipVowel2wylie.put('\'' + baseVowels[i][0], baseVowels[i][2]); - acipVowel2wylie.put(baseVowels[i][0] + 'm', baseVowels[i][1] + 'M'); - acipVowel2wylie.put('\'' + baseVowels[i][0] + 'm', baseVowels[i][2] + 'M'); - acipVowel2wylie.put(baseVowels[i][0] + ':', baseVowels[i][1] + 'H'); - acipVowel2wylie.put('\'' + baseVowels[i][0] + ':', baseVowels[i][2] + 'H'); - acipVowel2wylie.put(baseVowels[i][0] + "m:", baseVowels[i][1] + "MH"); - acipVowel2wylie.put('\'' + baseVowels[i][0] + "m:", baseVowels[i][2] + "MH"); + putMapping(acipVowel2wylie, baseVowels[i][0], baseVowels[i][1]); + putMapping(acipVowel2wylie, '\'' + baseVowels[i][0], baseVowels[i][2]); + putMapping(acipVowel2wylie, baseVowels[i][0] + 'm', baseVowels[i][1] + 'M'); + putMapping(acipVowel2wylie, '\'' + baseVowels[i][0] + 'm', baseVowels[i][2] + 'M'); + putMapping(acipVowel2wylie, baseVowels[i][0] + ':', baseVowels[i][1] + 'H'); + putMapping(acipVowel2wylie, '\'' + baseVowels[i][0] + ':', baseVowels[i][2] + 'H'); + putMapping(acipVowel2wylie, baseVowels[i][0] + "m:", baseVowels[i][1] + "MH"); + putMapping(acipVowel2wylie, '\'' + baseVowels[i][0] + "m:", baseVowels[i][2] + "MH"); } } return (String)acipVowel2wylie.get(acip); @@ -228,27 +266,27 @@ class ACIPRules { acipOther2wylie = new HashMap(20); // DLC FIXME: check all these again. - acipOther2wylie.put(",", "/"); - acipOther2wylie.put(" ", " "); - acipOther2wylie.put(".", "*"); - acipOther2wylie.put("|", "|"); - acipOther2wylie.put("`", "!"); - acipOther2wylie.put(";", ";"); - acipOther2wylie.put("*", "@"); - acipOther2wylie.put("#", "@#"); - acipOther2wylie.put("%", "~X"); - acipOther2wylie.put("&", "&"); + putMapping(acipOther2wylie, ",", "/"); + putMapping(acipOther2wylie, " ", " "); + putMapping(acipOther2wylie, ".", "*"); + putMapping(acipOther2wylie, "|", "|"); + putMapping(acipOther2wylie, "`", "!"); + putMapping(acipOther2wylie, ";", ";"); + putMapping(acipOther2wylie, "*", "@"); + putMapping(acipOther2wylie, "#", "@#"); + putMapping(acipOther2wylie, "%", "~X"); + putMapping(acipOther2wylie, "&", "&"); - acipOther2wylie.put("0", "0"); - acipOther2wylie.put("1", "1"); - acipOther2wylie.put("2", "2"); - acipOther2wylie.put("3", "3"); - acipOther2wylie.put("4", "4"); - acipOther2wylie.put("5", "5"); - acipOther2wylie.put("6", "6"); - acipOther2wylie.put("7", "7"); - acipOther2wylie.put("8", "8"); - acipOther2wylie.put("9", "9"); + putMapping(acipOther2wylie, "0", "0"); + putMapping(acipOther2wylie, "1", "1"); + putMapping(acipOther2wylie, "2", "2"); + putMapping(acipOther2wylie, "3", "3"); + putMapping(acipOther2wylie, "4", "4"); + putMapping(acipOther2wylie, "5", "5"); + putMapping(acipOther2wylie, "6", "6"); + putMapping(acipOther2wylie, "7", "7"); + putMapping(acipOther2wylie, "8", "8"); + putMapping(acipOther2wylie, "9", "9"); } return (String)acipOther2wylie.get(acip); } @@ -465,39 +503,52 @@ class ACIPRules { /** Gets the duffcodes for vowel, such that they look good with * the stack with hash key hashKey, and appends them to r. */ - static void getDuffForACIPVowel(ArrayList r, DuffCode preceding, String vowel) { + static void getDuffForACIPVowel(ArrayList duff, DuffCode preceding, String vowel) { if (null == vowel) return; if (null == getWylieForACIPVowel(vowel)) // FIXME: expensive assertion! Use assert. throw new IllegalArgumentException("Vowel " + vowel + " isn't in the small set of vowels we handle correctly."); // Order matters here. + boolean context_added[] = new boolean[] { false }; if (vowel.startsWith("A")) { - TibTextUtils.getVowel(r, preceding, THDLWylieConstants.WYLIE_aVOWEL); + TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.WYLIE_aVOWEL, context_added); } else if (vowel.indexOf("'U") >= 0) { - TibTextUtils.getVowel(r, preceding, "U"); + TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.U_VOWEL, context_added); + } else if (vowel.indexOf("'I") >= 0) { + TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.I_VOWEL, context_added); } else { - if (vowel.indexOf('\'') >= 0) - TibTextUtils.getVowel(r, preceding, THDLWylieConstants.A_VOWEL); - if (vowel.indexOf("EE") >= 0) - TibTextUtils.getVowel(r, preceding, THDLWylieConstants.ai_VOWEL); - else if (vowel.indexOf('E') >= 0) - TibTextUtils.getVowel(r, preceding, THDLWylieConstants.e_VOWEL); - if (vowel.indexOf("OO") >= 0) - TibTextUtils.getVowel(r, preceding, THDLWylieConstants.au_VOWEL); - else if (vowel.indexOf('O') >= 0) - TibTextUtils.getVowel(r, preceding, THDLWylieConstants.o_VOWEL); - if (vowel.indexOf('I') >= 0) - TibTextUtils.getVowel(r, preceding, THDLWylieConstants.i_VOWEL); - if (vowel.indexOf('U') >= 0) - TibTextUtils.getVowel(r, preceding, THDLWylieConstants.u_VOWEL); - if (vowel.indexOf('i') >= 0) - TibTextUtils.getVowel(r, preceding, THDLWylieConstants.reverse_i_VOWEL); + if (vowel.indexOf('\'') >= 0) { + TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.A_VOWEL, context_added); + } + if (vowel.indexOf("EE") >= 0) { + TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.ai_VOWEL, context_added); + } else if (vowel.indexOf('E') >= 0) { + TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.e_VOWEL, context_added); + } + if (vowel.indexOf("OO") >= 0) { + TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.au_VOWEL, context_added); + } else if (vowel.indexOf('O') >= 0) { + TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.o_VOWEL, context_added); + } + if (vowel.indexOf('I') >= 0) { + TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.i_VOWEL, context_added); + } + if (vowel.indexOf('U') >= 0) { + TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.u_VOWEL, context_added); + } + if (vowel.indexOf('i') >= 0) { + TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.reverse_i_VOWEL, context_added); + } } + // DLC FIXME: Use TMW9.61, the "o'i" special combination, when appropriate. - if (vowel.indexOf('m') >= 0) - r.add(TibetanMachineWeb.getGlyph("M")); + if (vowel.indexOf('m') >= 0) { + DuffCode last = (DuffCode)duff.get(duff.size() - 1); + duff.remove(duff.size() - 1); + TibTextUtils.getBindu(duff, last); + } if (vowel.indexOf(':') >= 0) - r.add(TibetanMachineWeb.getGlyph("H")); + duff.add(TibetanMachineWeb.getGlyph("H")); } } diff --git a/source/org/thdl/tib/text/ttt/ACIPTshegBarScanner.java b/source/org/thdl/tib/text/ttt/ACIPTshegBarScanner.java index 1bdc019..e412215 100644 --- a/source/org/thdl/tib/text/ttt/ACIPTshegBarScanner.java +++ b/source/org/thdl/tib/text/ttt/ACIPTshegBarScanner.java @@ -1,3 +1,4 @@ +// DLC NOW: KAsh ->Ksh here! optionally! /* The contents of this file are subject to the THDL Open Community License Version 1.0 (the "License"); you may not use this file except in compliance diff --git a/source/org/thdl/tib/text/ttt/PackageTest.java b/source/org/thdl/tib/text/ttt/PackageTest.java index a7be421..ba71f11 100644 --- a/source/org/thdl/tib/text/ttt/PackageTest.java +++ b/source/org/thdl/tib/text/ttt/PackageTest.java @@ -340,6 +340,22 @@ tstHelper("KA'", "[(K . A), (' . )]", new String[] { }, "{G+G}{YE}{S}"); + // DLC FIXME: warn about BDE vs. B+DE. color such differently. Maybe an inputter saw B+DE and typed in BDE, not thinking. + tstHelper("BDE", "{B}{DE}", + new String[] { "{B}{DE}", "{B+DE}" }, + new String[] { "{B}{DE}" }, + "{B}{DE}"); + + tstHelper("SHR'I", "{SH}{R'I}", + null, + null, + "{SH+R'I}"); + + + // DLC FIXME: test EWTS {pouM} + + // DLC FIXME: do TMW->ACIP->TMW->ACIP round-trip. + tstHelper("DRUG", "{D}{RU}{G}", new String[] { "{D}{RU}{G}", "{D+RU}{G}" }, new String[] { "{D+RU}{G}" }, @@ -7302,6 +7318,7 @@ tstHelper("ZUR"); "\u0f04\u0f05\u0f04\u0f05\u0f05\u0f67\u0f74\u0f7e\u0f7f\u0f0b\u0f42\u0fa2\u0f7d\u0f0b\u0f42\u0fb2\u0f7c\u0f08\u0f11\u0f0c\u0f0d"); uhelp("*#HUm: K+DHA GRO`;.,", "\u0f04\u0f05\u0f04\u0f05\u0f05\u0f67\u0f74\u0f7e\u0f7f\u0f0b[#ERROR CONVERTING ACIP DOCUMENT: THE TSHEG BAR (\"SYLLABLE\") K+DHA IS ESSENTIALLY NOTHING.]\u0f0b\u0f42\u0fb2\u0f7c\u0f08\u0f11\u0f0c\u0f0d"); + // DLC FIXME: the file ACIP_SHRI should be made into an ACIP->TMW automated test case } /** Tests some more tsheg bars, these from Dr. Lacey's critical diff --git a/source/org/thdl/tib/text/ttt/TParseTree.java b/source/org/thdl/tib/text/ttt/TParseTree.java index 20ca8c0..9df8aa7 100644 --- a/source/org/thdl/tib/text/ttt/TParseTree.java +++ b/source/org/thdl/tib/text/ttt/TParseTree.java @@ -197,32 +197,42 @@ class TParseTree { * stack can take every prefix, which is not the case in * reality */ public TStackListList getUniqueParse(boolean noPrefixTests) { - TStackListList allLegalParses = new TStackListList(2); // save memory + // For Sanskrit+Tibetan: + TStackListList allNonillegalParses = new TStackListList(2); // save memory + // For Tibetan only: + TStackListList allStrictlyLegalParses = new TStackListList(2); // save memory + TStackListList legalParsesWithVowelOnRoot = new TStackListList(1); ParseIterator pi = getParseIterator(); while (pi.hasNext()) { TStackList sl = pi.next(); - BoolPair bpa = sl.isLegalTshegBar(noPrefixTests); - if (bpa.isLegal) { - if (bpa.isLegalAndHasAVowelOnRoot) + BoolTriple bt = sl.isLegalTshegBar(noPrefixTests); + if (bt.isLegal) { + if (bt.isLegalAndHasAVowelOnRoot) legalParsesWithVowelOnRoot.add(sl); - allLegalParses.add(sl); + if (!bt.isLegalButSanskrit) + allStrictlyLegalParses.add(sl); + allNonillegalParses.add(sl); } } if (legalParsesWithVowelOnRoot.size() == 1) return legalParsesWithVowelOnRoot; else { + if (allStrictlyLegalParses.size() == 1) + return allStrictlyLegalParses; + if (allStrictlyLegalParses.size() > 2) + throw new Error("can this happen?"); if (legalParsesWithVowelOnRoot.size() == 2) { if (legalParsesWithVowelOnRoot.get(0).size() != 1 + legalParsesWithVowelOnRoot.get(1).size()) throw new Error("Something other than the G-YA vs. GYA case appeared. Sorry for your trouble! " + legalParsesWithVowelOnRoot.get(0) + " ;; " + legalParsesWithVowelOnRoot.get(1)); return new TStackListList(legalParsesWithVowelOnRoot.get(1)); } - if (allLegalParses.size() == 2) { - if (allLegalParses.get(0).size() != 1 + allLegalParses.get(1).size()) - throw new Error("Something other than the G-YA vs. GYA case appeared. Sorry for your trouble! " + allLegalParses.get(0) + " ;; " + allLegalParses.get(1)); - return new TStackListList(allLegalParses.get(1)); + if (allNonillegalParses.size() == 2) { + if (allNonillegalParses.get(0).size() != 1 + allNonillegalParses.get(1).size()) + throw new Error("Something other than the G-YA vs. GYA case appeared. Sorry for your trouble! " + allNonillegalParses.get(0) + " ;; " + allNonillegalParses.get(1)); + return new TStackListList(allNonillegalParses.get(1)); } - return allLegalParses; + return allNonillegalParses; } } diff --git a/source/org/thdl/tib/text/ttt/TStackList.java b/source/org/thdl/tib/text/ttt/TStackList.java index 440a771..647ce54 100644 --- a/source/org/thdl/tib/text/ttt/TStackList.java +++ b/source/org/thdl/tib/text/ttt/TStackList.java @@ -121,16 +121,16 @@ class TStackList { * happen. */ public ListIterator listIterator() { return al.listIterator(); } - /** Returns a pair with {@link BoolPair#isLegal} true if and only - * if this list of stacks is a legal tsheg bar by the rules of - * Tibetan syntax (sometimes called rules of spelling). If this - * is legal, then {@link BoolPair#isLegalAndHasAVowelOnRoot} will - * be true if and only if there is an explicit {A} vowel on the - * root stack. + /** Returns a pair with {@link BoolTriple#isLegal} true if and + * only if this list of stacks is a legal tsheg bar by the rules + * of Tibetan syntax (sometimes called rules of spelling). If + * this is legal, then {@link + * BoolTriple#isLegalAndHasAVowelOnRoot} will be true if and only + * if there is an explicit {A} vowel on the root stack. * @param noPrefixTests true if you want to pretend that every * stack can take every prefix, which is not the case in * reality */ - public BoolPair isLegalTshegBar(boolean noPrefixTests) { + public BoolTriple isLegalTshegBar(boolean noPrefixTests) { // DLC handle PADMA and other Tibetanized Sanskrit fellows consistently. Right now we only treat single-stack Sanskrit guys as legal. TTGCList tgcList = new TTGCList(this); @@ -162,7 +162,9 @@ class TStackList { } } } - return new BoolPair(isLegal, isLegalAndHasAVowelOnRoot); + return new BoolTriple(isLegal, + (candidateType == "single-sanskrit-gc"), + isLegalAndHasAVowelOnRoot); } private static final boolean ddebug = false; @@ -232,11 +234,15 @@ class TStackList { } /** Too simple to comment. */ -class BoolPair { +class BoolTriple { boolean isLegal; + boolean isLegalButSanskrit; // some subset are legal but legal Sanskrit -- the single sanskrit stacks are this way, such as B+DE. boolean isLegalAndHasAVowelOnRoot; - BoolPair(boolean isLegal, boolean isLegalAndHasAVowelOnRoot) { + BoolTriple(boolean isLegal, + boolean isLegalButSanskrit, + boolean isLegalAndHasAVowelOnRoot) { this.isLegal = isLegal; + this.isLegalButSanskrit = isLegalButSanskrit; this.isLegalAndHasAVowelOnRoot = isLegalAndHasAVowelOnRoot; } }