Fixed ACIP->TMW vowels like 'I etc.

Fixed ACIP->Unicode/TMW for BDE, which should be B-DE, not B+DE, because the former is legal Tibetan.

The ACIP->EWTS subroutine has improved.

TMW->Wylie and TMW->ACIP are improved in error cases.

TMW->ACIP has friendly embedded error messages now.
This commit is contained in:
dchandler 2003-09-12 05:06:37 +00:00
parent 16817d0b8e
commit 115d0e0e6c
14 changed files with 689 additions and 472 deletions

View file

@ -345,18 +345,24 @@ public class DuffPane extends TibetanPane implements FocusListener {
* The keymap defines a default behavior for key presses
* in both Tibetan and Roman mode.
*/
private void setupKeymap() {
Action defaultAction = new AbstractAction() {
public void actionPerformed(ActionEvent e) {
DuffPane.this.performKeyStroke(e.getModifiers(),
e.getActionCommand());
}
};
createActionTable(this);
Keymap keymap = addKeymap("DuffBindings", getKeymap());
keymap.setDefaultAction(defaultAction);
setKeymap(keymap);
}
private void setupKeymap() {
Action defaultAction = new AbstractAction() {
public void actionPerformed(ActionEvent e) {
try {
DuffPane.this.performKeyStroke(e.getModifiers(),
e.getActionCommand());
} catch (Throwable t) {
System.err.println("JSKAD ERROR: " + t);
t.printStackTrace(System.err);
System.exit(1);
}
}
};
createActionTable(this);
Keymap keymap = addKeymap("DuffBindings", getKeymap());
keymap.setDefaultAction(defaultAction);
setKeymap(keymap);
}
private void createActionTable(JTextComponent textComponent) {
actions = new Hashtable();
@ -746,78 +752,82 @@ public class DuffPane extends TibetanPane implements FocusListener {
*
* @param v the vowel (in Wylie) you want to insert
*/
private void putVowel(String v) {
if (caret.getDot()==0) {
if (!TibetanMachineWeb.isAChenRequiredBeforeVowel())
printAChenWithVowel(v);
private void putVowel(String v) {
if (caret.getDot()==0) {
if (!TibetanMachineWeb.isAChenRequiredBeforeVowel())
printAChenWithVowel(v);
return;
}
return;
}
AttributeSet attr = getTibDoc().getCharacterElement(caret.getDot()-1).getAttributes();
String fontName = StyleConstants.getFontFamily(attr);
int fontNum;
AttributeSet attr = getTibDoc().getCharacterElement(caret.getDot()-1).getAttributes();
String fontName = StyleConstants.getFontFamily(attr);
int fontNum;
if (0 != (fontNum = TibetanMachineWeb.getTMWFontNumber(fontName))) {
try {
char c2 = getTibDoc().getText(caret.getDot()-1, 1).charAt(0);
int k = (int)c2;
if (k<32 || k>126) { //if previous character is formatting or some other non-character
if (!TibetanMachineWeb.isAChenRequiredBeforeVowel())
printAChenWithVowel(v);
if (0 != (fontNum = TibetanMachineWeb.getTMWFontNumber(fontName))) {
try {
char c2 = getTibDoc().getText(caret.getDot()-1, 1).charAt(0);
int k = (int)c2;
if (k<32 || k>126) { //if previous character is formatting or some other non-character
if (!TibetanMachineWeb.isAChenRequiredBeforeVowel())
printAChenWithVowel(v);
return;
}
return;
}
String wylie
String wylie
= TibetanMachineWeb.getWylieForGlyph(fontNum,
k,
TibTextUtils.weDoNotCareIfThereIsCorrespondingWylieOrNot);
if (TibetanMachineWeb.isWyliePunc(wylie)) {
if (charList.isEmpty() && !TibetanMachineWeb.isAChenRequiredBeforeVowel()) {
printAChenWithVowel(v);
return;
}
}
if (TibetanMachineWeb.isWyliePunc(wylie)) {
if (charList.isEmpty() && !TibetanMachineWeb.isAChenRequiredBeforeVowel()) {
printAChenWithVowel(v);
return;
}
}
DuffCode dc_1 = null;
DuffCode dc_2 = new DuffCode(fontNum, c2);
DuffCode dc_1 = null;
DuffCode dc_2 = new DuffCode(fontNum, c2);
if (caret.getDot() >= 2) {
attr = getTibDoc().getCharacterElement(caret.getDot()-2).getAttributes();
fontName = StyleConstants.getFontFamily(attr);
if (0 != (fontNum = TibetanMachineWeb.getTMWFontNumber(fontName))) {
c2 = getTibDoc().getText(caret.getDot()-2, 1).charAt(0);
dc_1 = new DuffCode(fontNum, c2);
}
}
if (caret.getDot() >= 2) {
attr = getTibDoc().getCharacterElement(caret.getDot()-2).getAttributes();
fontName = StyleConstants.getFontFamily(attr);
if (0 != (fontNum = TibetanMachineWeb.getTMWFontNumber(fontName))) {
c2 = getTibDoc().getText(caret.getDot()-2, 1).charAt(0);
dc_1 = new DuffCode(fontNum, c2);
}
}
java.util.List before_vowel = new ArrayList();
if (null != dc_1)
before_vowel.add(dc_1);
java.util.List before_vowel = new ArrayList();
if (null != dc_1)
before_vowel.add(dc_1);
before_vowel.add(dc_2);
java.util.List after_vowel = new ArrayList();
TibTextUtils.getVowel(after_vowel, dc_1, dc_2, v);
before_vowel.add(dc_2);
java.util.List after_vowel = new ArrayList();
try {
TibTextUtils.getVowel(after_vowel, dc_1, dc_2, v);
} catch (IllegalArgumentException e) {
// drop this vowel silently.
}
if (after_vowel.size() >= before_vowel.size()) {
setNumberOfGlyphsForLastVowel(after_vowel.size()
- before_vowel.size());
} else {
setNumberOfGlyphsForLastVowel(0);
ThdlDebug.noteIffyCode(); // I don't think this can ever happen, but...
// can happen for pou (as opposed to puo) (FIXME)
}
redrawGlyphs(before_vowel, after_vowel);
}
catch(BadLocationException ble) {
System.out.println("no--can't insert here");
redrawGlyphs(before_vowel, after_vowel);
}
catch(BadLocationException ble) {
System.out.println("no--can't insert here");
ThdlDebug.noteIffyCode();
}
}
else { //0 font means not Tibetan font, so begin new Tibetan font section
if (!TibetanMachineWeb.isAChenRequiredBeforeVowel())
printAChenWithVowel(v);
}
}
}
}
else { //0 font means not Tibetan font, so begin new Tibetan font section
if (!TibetanMachineWeb.isAChenRequiredBeforeVowel())
printAChenWithVowel(v);
}
}
/**
@ -840,14 +850,18 @@ public class DuffPane extends TibetanPane implements FocusListener {
*
* @param v the vowel (in Wylie) which you want to print with ACHEN
*/
private void printAChenWithVowel(String v) {
DuffCode[] dc_array = (DuffCode[])TibetanMachineWeb.getTibHash().get(TibetanMachineWeb.ACHEN);
DuffCode dc = dc_array[TibetanMachineWeb.TMW];
java.util.List achenlist = new ArrayList();
TibTextUtils.getVowel(achenlist, dc, v);
DuffData[] dd = TibTextUtils.convertGlyphs(achenlist);
getTibDoc().insertDuff(caret.getDot(), dd);
}
private void printAChenWithVowel(String v) {
DuffCode[] dc_array = (DuffCode[])TibetanMachineWeb.getTibHash().get(TibetanMachineWeb.ACHEN);
DuffCode dc = dc_array[TibetanMachineWeb.TMW];
java.util.List achenlist = new ArrayList();
try {
TibTextUtils.getVowel(achenlist, dc, v);
} catch (IllegalArgumentException e) {
// drop this vowel silently.
}
DuffData[] dd = TibTextUtils.convertGlyphs(achenlist);
getTibDoc().insertDuff(caret.getDot(), dd);
}
/**
* Puts a bindu/anusvara at the current caret position.
@ -884,7 +898,8 @@ public class DuffPane extends TibetanPane implements FocusListener {
DuffCode dc = new DuffCode(fontNum, c2);
java.util.List beforecaret = new ArrayList();
beforecaret.add(dc);
java.util.List bindulist = TibTextUtils.getBindu(dc);
java.util.List bindulist = new LinkedList();
TibTextUtils.getBindu(bindulist, dc);
redrawGlyphs(beforecaret, bindulist);
initKeyboard();
return;
@ -895,7 +910,9 @@ public class DuffPane extends TibetanPane implements FocusListener {
}
}
DuffData[] dd = TibTextUtils.convertGlyphs(TibTextUtils.getBindu(null));
java.util.List binduList = new LinkedList();
TibTextUtils.getBindu(binduList, null);
DuffData[] dd = TibTextUtils.convertGlyphs(binduList);
getTibDoc().insertDuff(caret.getDot(), dd);
initKeyboard();
}

View file

@ -37,13 +37,13 @@ rgyal ba kun dngos mtsho skyes rdo rje bstan pa'i rtsa lag thams cad mkhyen pa z
bka' drin gzugs can dbyig 'dzin lto 'dir shong 'gyur min na kun mkhyen srang las gang gis gzhal//\par
\par
li khri'i lcug phran mkhyen pa'i snang ba can//\par
'jam mgon bloX. yi lang tsho bazaX.nga po'i tshon//\par
'jam mgon bloX yi lang tsho bazaX.nga po'i tshon//\par
kha dog so sor bkra ba'i gragaX.sa paX.'i rgyan//\par
phyogs bral rna lung 'god mkhas rtag tu rgyal//\f2\fs44\i0\b0\ul0\cf0\par
\par
\f1\fs28\i0\b0\ul0 dpal ldan chos kyi rang bzhin ngos yangs par//\par
gnas lnga'i bang mdzod 'byor par lhun grub pa'i//\par
mnga' sgyur bgrod byed ban+d+hu dziA wa ka/\par
mnga' sgyur bgrod byed ban+d+hu dzI wa ka/\par
rmad byung cod pan mchog tu rnam par bsngags//\par
\par
rgya hor lung dang rigs kyis mngon mtho zhing //\par
@ -77,7 +77,7 @@ deb ther rdzogs ldan gsar pa'i dga' ston mgron//\par
\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 de yang ston pa 'di nyid kyi gdung la nyi ma'i gnyen dang bu ram shing pa shAkya zhes brjod pa ni/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 yul gru 'dzin du rgyal po brgya tham pa byung ba'i mtha' ma rna ba can gyi bu gau ta ma dang b+ha ra d+h+wa dza gnyis las/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 sngon mas rab tu byung ste lo ma'i spyil po yid du 'ong ba'i bsam gtan gyi khang bur gnas pa'i tshe/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 g.yo can pad+ma'i rtsa lag dang bzang mo dga' mgur spyod pa'i dus las yol bas/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 bzang mo bas.d pa'i ral gri khrag can lo ma'i spyil po'i nye 'dabs su bor ba'i rkyen gyis gau ta ma bsal shing la bskyon pa'i dus/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 drang srong mdog nag gi rdzu 'phrul gyis gru char gyi thigs pa dang /\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 dri bzhon lus la reg pa'i rkyen gyis 'dod pa'i gnas rjes su dran pa'i yid kyi shing rtas drangs te/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 khams kyi dwangs ma gnyis lhan cig tu lhung ba las sgo nga gnyis su gyur pa nyi 'od kyis bskyangs shing khye'u gnyis bu ram gyi shing gseb tu zhugs pas ming de ltar du grags la/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 bu ram shing pa'i brgyud la rgyal po brgya byung ba'i mtha' ma 'phags skyes po/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 'phags skyes po la sras bzhi byung yang btsun mo 'das te slar stobs ldan gyi rgyal po gzhan zhig gi bu mo khab tu blangs par sras byung na rgyal srid du dbang bskur bar khas blangs pas dam bcas/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 sras sngon ma rnams spyugs pa rang rang gi sring mo dang bcas te chu bo skal ldan shing rta'i 'gram du ming sring lhan cig tu 'dus pa las shAkya'i brgyud ces grags pa yin no//\par
\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 de yang kun mkhyen nyi ma'i gnyen 'di nyid 'khrungs lor mkhas grub rnams kyi bzhed tshul mang du mchis kyang don grub ces pa sa lug dbyar zla 'bring po'i chu stod kyi nya ba'i tshes bco lnga'i dus su/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 yum gso sbyong la gnas pa'i lhums su glang po che'i phrug gu thal dkar gyi rnam par gyur nas zhugs/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 zla ba bcu phal cher lon pa legs par sbyar ba lha'i skad du ru dra zhes pa bsil ldan pa rnams drag por brjod pa/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 rgya nag pa dbang thang dang bstun pa'i ming ging shing /\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 bod yul du lcags pho spre'ura 'bod pa'i dpyid tha sa gas nya ba me tog can gyi zla ba/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 rgya nag pa si yol du brjod pa'i yar ngo'i tshes bdun gyi res gza' 'od zer bdun pa dang /\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 las skar rgyal la babs pa na lum+bi ni'i nags su sku bltams/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 de nas lang tsho'i kun+da rnam par rgyas pa'i zil mngar gyi sgyu rtsal drug cu rtsa bzhi'i yal 'dab kyi yon tan kun la rang gir bgyis shing /\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 pad+mo'i snyems pa thogs pa'i ri dwags mig can gyi don yongs su tshang ba'i grags 'dzin ma dang sa mtsho ma sogs btsun mor dbang bskur te lo nyi shu rtsa dgu'i bar du nyi 'og gyi rgyal phran gnyis 'thung gi spyi bor 'jigs pa med pa'i rgyal thabs kyi bya ba'i skad gsang mthon pos rang byan du 'gying bar dbang thob na'ang /\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 mchog gi sprul sku'i mdzad pa ston pa'i ched du/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 dgung lo nyer dgu pa kun 'dzin zhes pa sa pho byi ba la rang byung gi sgo nas/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 rab byung gi dngos po yongs su rdzogs te/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 lo drug tu dka' ba spyad pas lang tsho phun sum tshogs pa las gzhan du gyur pa na/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 dga' mo dang dga' stobs mas ba stong gi 'o ma lan bcu drug gi bar du nying khur byas pa'i 'o thug sbrang rtsi dang sbyar ba'i kun tu bzang po'i mchod sprin gyis sku su war+Na'i mchod sdong nyi gzhon 'bum gyis 'khyud pa ltar gyur te rdo rje gdan du byang chub kyi shing drung na 'chi med bdag pos rtswa 'tshong gyi rnam par sprul te phul ba'i rtswa'i gdan la bzhugs nas/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 chu srin rgyal mtshan can sde dang bcas pa'i g.yul las rnam par rgyal ba'i ba dan nam mkha'i mtha' klas par bsgrengs te/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 dgung lo so lnga pa rgyal ba zhes pa shing rta'i sa gas nya ba'i bco lnga'i skya rengs shar ba na ye shes mchog gi bdud rtsi mngon du mdzad do/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 /mdo las/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 de'i nyin mo zla ba sgra gcan gyis bzung ba dang /\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 sgra gcan 'dzin dang bdud rtsi zas kyi khye'u zhig kyang skyes par bshad pa'i gza' 'dzin gyi ri mo ni gza' gnas su gcig chu tshod so brgyad zla skar gyi skar gnas su bcu drug chu tshod stong pa sgra gcan gdong gi skar mar bcu drug chu tshod nyer dgu byung bas gza' 'dzin gyi ri mo ni tshad ma'i aA dar+sha gtsang ma'i ngos su gsal bar shar ba yin no/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 /\par
\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 de nas zhag zhe dgu'i bar du dam pa'i chos kyi bdud rtsi'i sgo brgya ma phye ba'i tshul bstan pas/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 'jig rten gyi byed po gser mngal can gyis skal pa mchog gi bzang po'i dpal dang ldan pa'i don du gsol ba la brten nas/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 mchog dman gyi gdul bya so so'i blo dang 'tsham par zab rgyas chos kyi sgo glegs rnam par bkral nas theg pa che chung gi rigs can gyi yol go/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 rang 'dod pa'i tshogs mtha' dag gcig tu 'jo ba'i ro zad mi shes pa mkha' khyab tu bro ba'i 'khor lo bskor te/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 mthar gyad kyi yul rtswa mchog gi grong du dgung lo gya gcig pa dpa' bo zhes pa lcags 'brug gi lor mi 'gyur ba'i bde ba chen po chos kyi sku'i mkha' klong du sna tshogs pa'i sprul sku'i zlos gar gyis rol pa'i chu 'dzin gzugs sku tha mi dad pa'i mdzad pa bstan to/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 /\par
\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 de ltar bstan pa rin po che yul dbus 'gyur nas char dus kyi dal 'gro'i klung rang babs su 'gyur ba'i gshis lugs bzhin tha grur khyab pa las/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 'gro ba'i bla ma shAkya seng ge gya gcig pa lcags 'brug lo nag nyar shriA d+hA n+ya ka Ta kar gsang ba kun gyi gan mdzod rnam par 'dzin pa'i mi'i dbang phyug zla ba bzang por sho lo ka stong phrag bcu gnyis kyi bdag nyid can gyi dpal dang po'i sangs rgyas dus kyi 'khor lo'i rgyud gsungs pa sham+b+ha lar spyan drangs nas ma la ya'i skyed tshal du sku gsungs thugs yongs su rdzogs pa'i dkyil 'khor gyi snang brnyan ye shes kyi gzi byin mngon par 'bar ba'i sgyu ma'i gar gyis rtsen pa'i 'dren byed kyi dga' ston gsar pa bskrun te/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 rdo rje theg pa'i bgo skal ka lA pa'i grong khyer gyi skye rgur 'gyed pa'i mu mtha' bral ba'i mdzad pa nas bzung 'jigs pa med pa'i gdong lnga'i khri la bgrod byed rtsibs stong 'khor lo'i ri mo mchog tu bkra ba'i chos rgyal rigs ldan sum cu rtsa gsum du byon pa mtha' dag gis phyi nang gzhan gsum sbyor ba'i gsang chen nA da'i sgra dbyangs kyi nor bu'i 'phyang 'phrul las ldan gyi rna bar spud par mdzad pa'i tha ma/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 drag po'i 'khor lo can gyis dgung lo dgu bcu rtsa brgyad par kla klo'i dpung tshogs mtha' dag ming gi lhag mar byas nas snga na med pas sa chen po'i khor yug kun tu khyab par mdzad do/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 /de yang rigs ldan drag po dgung lo lnga bcu'i steng du kla klo bcom par bzhed pa ltar na/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 kla klo'i gnas tshad stong dang brgyad brgyar bshad pa las lo grangs bzhi bcu rtsa bdun tsam mi 'grig pa'i skyon yod cing /\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 de dag rags rtsis su 'chad na zhib rtsis ni 'byung ba'i skabs med pas gzur gnas kyi shes ldan rnams soms shig\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 de ltar sham b+ha las mtshon te/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 ao rgyan/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 bal yul/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 rgya hor sogs su bstan pa byung tshul ni mtha' yas kyang mang gis dogs pas ma spros so/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 /\par
\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 de ltar bstan pa rin po che yul dbus 'gyur nas char dus kyi dal 'gro'i klung rang babs su 'gyur ba'i gshis lugs bzhin tha grur khyab pa las/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 'gro ba'i bla ma shAkya seng ge gya gcig pa lcags 'brug lo nag nyar shrI d+hA n+ya ka Ta kar gsang ba kun gyi gan mdzod rnam par 'dzin pa'i mi'i dbang phyug zla ba bzang por sho lo ka stong phrag bcu gnyis kyi bdag nyid can gyi dpal dang po'i sangs rgyas dus kyi 'khor lo'i rgyud gsungs pa sham+b+ha lar spyan drangs nas ma la ya'i skyed tshal du sku gsungs thugs yongs su rdzogs pa'i dkyil 'khor gyi snang brnyan ye shes kyi gzi byin mngon par 'bar ba'i sgyu ma'i gar gyis rtsen pa'i 'dren byed kyi dga' ston gsar pa bskrun te/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 rdo rje theg pa'i bgo skal ka lA pa'i grong khyer gyi skye rgur 'gyed pa'i mu mtha' bral ba'i mdzad pa nas bzung 'jigs pa med pa'i gdong lnga'i khri la bgrod byed rtsibs stong 'khor lo'i ri mo mchog tu bkra ba'i chos rgyal rigs ldan sum cu rtsa gsum du byon pa mtha' dag gis phyi nang gzhan gsum sbyor ba'i gsang chen nA da'i sgra dbyangs kyi nor bu'i 'phyang 'phrul las ldan gyi rna bar spud par mdzad pa'i tha ma/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 drag po'i 'khor lo can gyis dgung lo dgu bcu rtsa brgyad par kla klo'i dpung tshogs mtha' dag ming gi lhag mar byas nas snga na med pas sa chen po'i khor yug kun tu khyab par mdzad do/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 /de yang rigs ldan drag po dgung lo lnga bcu'i steng du kla klo bcom par bzhed pa ltar na/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 kla klo'i gnas tshad stong dang brgyad brgyar bshad pa las lo grangs bzhi bcu rtsa bdun tsam mi 'grig pa'i skyon yod cing /\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 de dag rags rtsis su 'chad na zhib rtsis ni 'byung ba'i skabs med pas gzur gnas kyi shes ldan rnams soms shig\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 de ltar sham b+ha las mtshon te/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 ao rgyan/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 bal yul/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 rgya hor sogs su bstan pa byung tshul ni mtha' yas kyang mang gis dogs pas ma spros so/\f3\fs44\i0\b0\ul0\cf0 \f1\fs28\i0\b0\ul0 /\par
\f4\fs24\i0\b0\ul0\cf0\par
\pard\ql\f1\fs28\i0\b0\ul0\par
\fs24\par

View file

@ -171,10 +171,10 @@ public class TibetanConverter implements FontConverterConstants {
out.println("");
out.println("");
out.println(" In --to... and --acip-to... modes, needs one argument, the name of the");
out.println(" TibetanMachineWeb RTF");
out.println(" file (for --to-wylie, --to-unicode, and --to-tibetan-machine) or the name of");
out.println(" TibetanMachineWeb RTF file (for --to-wylie, --to-wylie-text, --to-acip-text,");
out.println(" --to-acip, --to-unicode, and --to-tibetan-machine) or the name of");
out.println(" the TibetanMachine RTF file (for --to-tibetan-machine-web) or the name of the");
out.println(" ACIP text file (for --acip-to-unicode). Writes the");
out.println(" ACIP text file (for --acip-to-unicode or --acip-to-tmw). Writes the");
out.println(" result to standard output (after dealing with the curly brace problem if");
out.println(" the input is TibetanMachineWeb). Exit code is zero on success, 42 if some");
out.println(" glyphs couldn't be converted (in which case the output is just those glyphs),");
@ -364,14 +364,14 @@ public class TibetanConverter implements FontConverterConstants {
+ ((TMW_TO_WYLIE_TEXT == ct) ? 1 : 0)
== 1);
long numAttemptedReplacements[] = new long[] { 0 };
if (TMW_TO_WYLIE == ct) {
if (TMW_TO_WYLIE == ct || TMW_TO_WYLIE_TEXT == ct) {
// Convert to THDL Wylie:
if (!tdoc.toWylie(0,
tdoc.getLength(),
numAttemptedReplacements)) {
exitCode = 44;
}
} else if (TMW_TO_ACIP == ct) {
} else if (TMW_TO_ACIP == ct || TMW_TO_ACIP_TEXT == ct) {
// Convert to ACIP:
if (!tdoc.toACIP(0,
tdoc.getLength(),
@ -411,7 +411,10 @@ public class TibetanConverter implements FontConverterConstants {
// Write to standard output the result:
if (TMW_TO_WYLIE_TEXT == ct || TMW_TO_ACIP_TEXT == ct) {
try {
tdoc.writeTextOutput(new BufferedWriter(new OutputStreamWriter(out)));
BufferedWriter bw
= new BufferedWriter(new OutputStreamWriter(out));
tdoc.writeTextOutput(bw);
bw.flush();
} catch (IOException e) {
exitCode = 40;
}

View file

@ -44,11 +44,18 @@ public class Manipulate
return ch=='a' || ch=='e' || ch=='i' || ch=='o' || ch=='u';
}
/** Returns null on error. */
public static String wylieToAcip(String palabra)
{
// DLC FIXME: for unknown things, return null.
if (palabra.equals("@#")) return "*";
if (palabra.equals("@#")) return "#";
if (palabra.equals("!")) return "`";
if (palabra.equals("b+h")) return "BH";
if (palabra.equals("d+h")) return "DH";
if (palabra.equals("X")) return null;
if (palabra.equals("iA")) return null;
if (palabra.equals("ai")) return "EE";
if (palabra.equals("au")) return "OO";
if (palabra.equals("$")) return null;
if (palabra.startsWith("@") || palabra.startsWith("#"))
return null; // we can't convert this in isolation! We need context.
char []caract;
@ -93,7 +100,7 @@ public class Manipulate
nuevaPalabra = replace(nuevaPalabra, "u", "'U");
nuevaPalabra = replace(nuevaPalabra, "-I", "i");
nuevaPalabra = replace(nuevaPalabra, "/", ",");
nuevaPalabra = replace(nuevaPalabra, "_", " ");
nuevaPalabra = replace(nuevaPalabra, "_", " ");
nuevaPalabra = fixWazur(nuevaPalabra);
return nuevaPalabra;
}

View file

@ -168,7 +168,7 @@ public final class DuffCode {
* recursion (manifesting as a StackOverflowError)) */
public String toString(boolean TMW) {
boolean[] err = new boolean[] { false };
return "<duffcode font="
return "<glyph font="
+ (TMW
? TibetanMachineWeb.tmwFontNames
: TibetanMachineWeb.tmFontNames)[fontNum]

View file

@ -73,8 +73,14 @@ public class TGCPair {
vowelWylie = null;
}
public String getWylie() {
return getWylie(false);
}
public String getWylie(boolean appendaged) {
StringBuffer b = new StringBuffer();
if (consonantWylie != null) {
if (appendaged && !"'".equals(consonantWylie))
b.append("a"); // pa'am... we want 'am, not 'm; 'ang, not 'ng.
// we may have {p-y}, but the user wants to see {py}.
for (int i = 0; i < consonantWylie.length(); i++) {
char ch = consonantWylie.charAt(i);
@ -87,26 +93,35 @@ public class TGCPair {
return b.toString();
}
public String getACIP() {
return getACIP(false);
}
public String getACIP(boolean appendaged) {
// DLC FIXME: has the EWTS change affected Manipulate.acipToWylie?
StringBuffer b = new StringBuffer();
if (consonantWylie != null) {
String consonantACIP // DLC FIXME can KAsh occur?
= org.thdl.tib.scanner.Manipulate.wylieToAcip(consonantWylie);
if (null == consonantACIP) throw new Error("how?");
// System.out.println("DLC: Wylie=" + consonantWylie + ", ACIP=" + consonantACIP);
// we may have {P-Y}, but the user wants to see {PY}.
for (int i = 0; i < consonantACIP.length(); i++) {
char ch = consonantACIP.charAt(i);
if ('-' != ch)
b.append(ch);
String consonantACIP
= org.thdl.tib.text.ttt.ACIPRules.getACIPForEWTS(consonantWylie);
if (null == consonantACIP) {
return TibetanMachineWeb.getTMWToACIPErrorString("glyph with THDL Extended Wylie " + consonantWylie);
} else {
if (appendaged && !"'".equals(consonantWylie))
b.append("A"); // PA'AM
// we may have {P-Y}, but the user wants to see {PY}.
for (int i = 0; i < consonantACIP.length(); i++) {
char ch = consonantACIP.charAt(i);
if ('-' != ch)
b.append(ch);
}
}
}
if (vowelWylie != null) {
String vowelACIP // DLC FIXME look for exceptions
= org.thdl.tib.scanner.Manipulate.wylieToAcip(vowelWylie);
// System.out.println("DLC: Wylie=" + vowelWylie + ", ACIP=" + vowelACIP);
if (null == vowelACIP) throw new Error("how?");
b.append(vowelACIP);
String vowelACIP
= org.thdl.tib.text.ttt.ACIPRules.getACIPForEWTS(vowelWylie);
if (null == vowelACIP) {
return TibetanMachineWeb.getTMWToACIPErrorString("glyph with THDL Extended Wylie " + vowelWylie);
} else {
b.append(vowelACIP);
}
}
return b.toString();
}
@ -150,6 +165,12 @@ public class TGCPair {
}
this.consonantWylie = consonantWylie;
if (null != vowelWylie) {
if (vowelWylie.equals("iA") || vowelWylie.equals("Ai"))
vowelWylie = "I";
if (vowelWylie.equals("uA") || vowelWylie.equals("Au"))
vowelWylie = "U";
}
this.vowelWylie = vowelWylie;
this.classification = realClassification;
}

View file

@ -360,7 +360,7 @@ public class TibTextUtils implements THDLWylieConstants {
else
dc = (DuffCode)glyphs.removeLast(); //LinkedList implementation
glyphs.addAll(getBindu(dc));
getBindu(glyphs, dc);
}
else {
@ -477,303 +477,366 @@ public class TibTextUtils implements THDLWylieConstants {
}
/**
* Gets the bindu sequence for a given context.
* In the TibetanMachineWeb fonts, bindu (anusvara) is realized
* differently depending on which vowel it attaches to. Although
* the default bindu glyph is affixed to consonants and subscript vowels,
* for superscript vowels (i, e, o, etc), there is a single glyph
* which merges the bindu and that vowel together. When you pass this
* method a glyph context, it will return a List of glyphs which
* will either consist of the original glyph followed by the default
* bindu glyph, or a composite vowel+bindu glyph.
* Note that there is only one glyph in the context. This means that
* bindus will not affix properly if superscript vowels are allowed to directly
* precede subscript vowels (e.g. pou).
* @param dc the DuffCode of the glyph you
* want to attach a bindu to
* @return a List of DuffCode glyphs that include the
* original dc, as well as a bindu
*/
public static List getBindu(DuffCode dc) {
List bindus = new ArrayList();
if (null == dc) {
bindus.add(TibetanMachineWeb.getGlyph(String.valueOf(BINDU)));
return bindus;
}
if (!TibetanMachineWeb.getBinduMap().containsKey(dc)) {
bindus.add(dc);
bindus.add(TibetanMachineWeb.getGlyph(String.valueOf(BINDU)));
return bindus;
}
bindus.add((DuffCode)TibetanMachineWeb.getBinduMap().get(dc));
return bindus;
}
* Gets the bindu sequence for a given context. In the
* TibetanMachineWeb fonts, bindu (anusvara) is realized differently
* depending on which vowel it attaches to. Although the default bindu
* glyph is affixed to consonants and subscript vowels, for superscript
* vowels (i, e, o, etc), there is a single glyph which merges the
* bindu and that vowel together. When you pass this method a glyph
* context and a list, it will append to that list glyphs which will either consist
* of the original glyph followed by the default bindu glyph, or a
* composite vowel+bindu glyph. Note that there is only one glyph in
* the context. This means that bindus will not affix properly if
* superscript vowels are allowed to directly precede subscript vowels
* (e.g. pou).
* @param list a List of DuffCode glyphs to which will be appended the
* original dc (if non-null) as well as a bindu, or the one glyph that
* represents both
* @param dc the DuffCode of the glyph you want to attach a bindu to,
* or null */
public static void getBindu(List list, DuffCode dc) {
if (null == dc) {
list.add(TibetanMachineWeb.getGlyph(String.valueOf(BINDU)));
} else {
if (!TibetanMachineWeb.getBinduMap().containsKey(dc)) {
list.add(dc);
list.add(TibetanMachineWeb.getGlyph(String.valueOf(BINDU)));
} else {
list.add((DuffCode)TibetanMachineWeb.getBinduMap().get(dc));
}
}
}
/**
* Gets the vowel sequence for a given vowel in a given context.
* Given a context, this method affixes a vowel and returns the
* context plus the vowel. Generally, it is enough to provide just
* one glyph for context.
* Gets the vowel sequence for a given vowel in a given context. Given
* a context, this method affixes a vowel and returns the context (iff
* context_added[0] is false) plus the vowel. Generally, it is enough
* to provide just one glyph for context.
* @param context the glyph preceding the vowel you want to affix
* @param vowel the vowel you want to affix, in Wylie
* @param context_added an array of one boolean, an input/output
* parameter that, if true, means that only the vowel will be added to
* l, not the context, and if false, means that the context and the
* vowel will be added and that context_added[0] will be updated to be
* true
* @return a List of glyphs equal to the vowel in context
*/
* @throws IllegalArgumentException if the given combination is not
* supported */
public static void getVowel(List l, DuffCode context, String vowel, boolean context_added[]) {
getVowel(l, null, context, vowel, context_added);
}
/** Wrapper that calls for adding context to l. */
public static void getVowel(List l, DuffCode context, String vowel) {
getVowel(l, null, context, vowel);
getVowel(l, null, context, vowel, new boolean[] { false });
}
/** Wrapper that calls for adding context to l. */
public static void getVowel(List l, DuffCode context_1, DuffCode context_2, String vowel) {
getVowel(l, context_1, context_2, vowel, new boolean[] { false });
}
/**
* Gets the vowel sequence for a given vowel in a given context and
* appends it to l. Given a context, this method affixes a vowel and
* appends the context plus the vowel to l. Since the choice of vowel
* glyph depends on the consonant to which it is attached, generally it
* is enough to provide just the immediately preceding
* context. However, in some cases, double vowels are allowed - for
* example 'buo'. To find the correct glyph for 'o', we need 'b' in
* this case, not 'u'. Note also that some Extended Wylie vowels
* correspond to multiple glyphs in TibetanMachineWeb. For example, the
* vowel I consists of both an achung and a reverse gigu. All required
* glyphs are appended to l.
* appends the context (iff context_added[0] is false) plus the vowel
* to l. Since the choice of vowel glyph depends on the consonant to
* which it is attached, generally it is enough to provide just the
* immediately preceding context. However, in some cases, double vowels
* are allowed - for example 'buo'. To find the correct glyph for 'o',
* we need 'b' in this case, not 'u'. Note also that some Extended
* Wylie vowels correspond to multiple glyphs in TibetanMachineWeb. For
* example, the vowel I consists of both an achung and a reverse
* gigu. All required glyphs are appended to l.
* @param context_1 the glyph occurring two glyphs before the vowel you
* want to affix
* @param context_2 the glyph immediately before the vowel you want to
* affix
* @param vowel the vowel you want to affix, in Wylie */
* @param vowel the vowel you want to affix, in Wylie
* @param context_added an array of one boolean, an input/output
* parameter that, if true, means that only the vowel will be added to
* l, not the context, and if false, means that the context and the
* vowel will be added and that context_added[0] will be updated to be
* true
* @throws IllegalArgumentException if the given combination is not
* supported */
public static void getVowel(List l, DuffCode context_1, DuffCode context_2, String vowel) {
//this vowel doesn't correspond to a glyph -
//so you just return the original context
public static void getVowel(List l, DuffCode context_1, DuffCode context_2,
String vowel, boolean context_added[])
throws IllegalArgumentException
{
//this vowel doesn't correspond to a glyph -
//so you just return the original context
if ( vowel.equals(WYLIE_aVOWEL) ||
TibetanMachineWeb.isTopVowel(context_2)) {
if (context_1 != null)
l.add(context_1);
if (vowel.equals(WYLIE_aVOWEL)
|| TibetanMachineWeb.isTopVowel(context_2)) {
if (TibetanMachineWeb.isTopVowel(context_2))
throw new IllegalArgumentException("dropping vowels is bad");
if (!context_added[0]) {
context_added[0] = true;
if (context_1 != null)
l.add(context_1);
l.add(context_2);
return;
}
l.add(context_2);
}
return;
}
//first, the three easiest cases: ai, au, and <i
//these vowels have one invariant form - therefore,
//dc_context is just returned along with that form
//first, the three easiest cases: ai, au, and <i
//these vowels have one invariant form - therefore,
//dc_context is just returned along with that form
if (vowel.equals(ai_VOWEL)) {
if (context_1 != null)
l.add(context_1);
if (vowel.equals(ai_VOWEL)) {
if (!context_added[0]) {
context_added[0] = true;
if (context_1 != null)
l.add(context_1);
l.add(context_2);
DuffCode[] dc_v = (DuffCode[])TibetanMachineWeb.getTibHash().get(ai_VOWEL);
l.add(dc_v[TibetanMachineWeb.TMW]);
return;
}
l.add(context_2);
}
DuffCode[] dc_v = (DuffCode[])TibetanMachineWeb.getTibHash().get(ai_VOWEL);
l.add(dc_v[TibetanMachineWeb.TMW]);
return;
}
if (vowel.equals(au_VOWEL)) {
if (context_1 != null)
l.add(context_1);
if (vowel.equals(au_VOWEL)) {
if (!context_added[0]) {
context_added[0] = true;
if (context_1 != null)
l.add(context_1);
l.add(context_2);
DuffCode[] dc_v = (DuffCode[])TibetanMachineWeb.getTibHash().get(au_VOWEL);
l.add(dc_v[TibetanMachineWeb.TMW]);
return;
}
l.add(context_2);
}
DuffCode[] dc_v = (DuffCode[])TibetanMachineWeb.getTibHash().get(au_VOWEL);
l.add(dc_v[TibetanMachineWeb.TMW]);
return;
}
if (vowel.equals(reverse_i_VOWEL)) {
if (context_1 != null)
l.add(context_1);
if (vowel.equals(reverse_i_VOWEL)) {
if (!context_added[0]) {
context_added[0] = true;
if (context_1 != null)
l.add(context_1);
l.add(context_2);
l.add(context_2);
}
if (!TibetanMachineWeb.isTopVowel(context_2)) {
DuffCode[] dc_v = (DuffCode[])TibetanMachineWeb.getTibHash().get(reverse_i_VOWEL);
l.add(dc_v[TibetanMachineWeb.TMW]);
} else throw new IllegalArgumentException("dropping vowels is bad");
if (!TibetanMachineWeb.isTopVowel(context_2)) {
DuffCode[] dc_v = (DuffCode[])TibetanMachineWeb.getTibHash().get(reverse_i_VOWEL);
l.add(dc_v[TibetanMachineWeb.TMW]);
}
return;
}
return;
}
//second, the vowels i, e, and o
//these vowels have many different glyphs each,
//whose correct selection depends on the
//preceding context. therefore, dc_context is
//returned along with the vowel appropriate to
//that context
//second, the vowels i, e, and o
//these vowels have many different glyphs each,
//whose correct selection depends on the
//preceding context. therefore, dc_context is
//returned along with the vowel appropriate to
//that context
if (vowel.equals(i_VOWEL)) {
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_i);
if (null == dc_v && null != context_1) {
hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_1);
dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_i);
}
if (vowel.equals(i_VOWEL)) {
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_i);
if (null == dc_v && null != context_1) {
hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_1);
dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_i);
}
if (!context_added[0]) {
context_added[0] = true;
if (context_1 != null)
l.add(context_1);
if (context_1 != null)
l.add(context_1);
l.add(context_2);
}
if (null != dc_v)
l.add(dc_v);
else throw new IllegalArgumentException("dropping vowels is bad");
l.add(context_2);
return;
}
// DLC perfect TMW->Wylie wouldn't produce o'i for an input file containing merely TMW9.61 -- it would produce \u0f7c,\u0f60,\u0f72 -- round-trip shows why.
if (null != dc_v)
l.add(dc_v);
if (vowel.equals(e_VOWEL)) {
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_e);
if (null == dc_v && null != context_1) {
hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_1);
dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_e);
}
return;
}
if (!context_added[0]) {
context_added[0] = true;
if (context_1 != null)
l.add(context_1);
if (vowel.equals(e_VOWEL)) {
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_e);
if (null == dc_v && null != context_1) {
hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_1);
dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_e);
}
l.add(context_2);
}
if (null != dc_v)
l.add(dc_v);
else throw new IllegalArgumentException("dropping vowels is bad");
if (context_1 != null)
l.add(context_1);
return;
}
l.add(context_2);
if (vowel.equals(o_VOWEL)) {
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_o);
if (null == dc_v && null != context_1) {
hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_1);
dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_o);
}
if (null != dc_v)
l.add(dc_v);
if (!context_added[0]) {
context_added[0] = true;
if (context_1 != null)
l.add(context_1);
return;
}
l.add(context_2);
}
if (vowel.equals(o_VOWEL)) {
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_o);
if (null == dc_v && null != context_1) {
hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_1);
dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_o);
}
if (null != dc_v)
l.add(dc_v);
else throw new IllegalArgumentException("dropping vowels is bad");
if (context_1 != null)
l.add(context_1);
return;
}
l.add(context_2);
//next come the vowels u, A, and U
//these three vowels are grouped together because they all
//can cause the preceding context to change. in particular,
//both u and A cannot be affixed to ordinary k or g, but
//rather the shortened versions of k and g - therefore,
if (null != dc_v)
l.add(dc_v);
if (vowel.equals(u_VOWEL)) {
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
DuffCode halfHeight = TibetanMachineWeb.getHalfHeightGlyph(hashKey_context);
DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_u);
return;
}
if (!context_added[0]) {
context_added[0] = true;
if (null != context_1)
l.add(context_1);
//next come the vowels u, A, and U
//these three vowels are grouped together because they all
//can cause the preceding context to change. in particular,
//both u and A cannot be affixed to ordinary k or g, but
//rather the shortened versions of k and g - therefore,
if (null == halfHeight)
l.add(context_2);
else
l.add(halfHeight);
}
if (null != dc_v)
l.add(dc_v);
else throw new IllegalArgumentException("dropping vowels is bad");
if (vowel.equals(u_VOWEL)) {
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
DuffCode halfHeight = TibetanMachineWeb.getHalfHeightGlyph(hashKey_context);
DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_u);
return;
}
if (null != context_1)
l.add(context_1);
if (vowel.equals(A_VOWEL)) {
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
DuffCode halfHeight = TibetanMachineWeb.getHalfHeightGlyph(hashKey_context);
DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_A);
if (null == halfHeight)
l.add(context_2);
else
l.add(halfHeight);
if (!context_added[0]) {
context_added[0] = true;
if (null != context_1)
l.add(context_1);
if (null != dc_v)
l.add(dc_v);
if (null == halfHeight)
l.add(context_2);
else
l.add(halfHeight);
}
if (null != dc_v)
l.add(dc_v);
else throw new IllegalArgumentException("dropping vowels is bad");
return;
}
return;
}
if (vowel.equals(A_VOWEL)) {
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
DuffCode halfHeight = TibetanMachineWeb.getHalfHeightGlyph(hashKey_context);
DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_A);
if (vowel.equals(U_VOWEL)) {
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
DuffCode halfHeight = TibetanMachineWeb.getHalfHeightGlyph(hashKey_context);
DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_U);
if (null != context_1)
l.add(context_1);
if (!context_added[0]) {
context_added[0] = true;
if (null != context_1)
l.add(context_1);
if (null == halfHeight)
l.add(context_2);
else
l.add(halfHeight);
if (null == halfHeight)
l.add(context_2);
else
l.add(halfHeight);
}
if (null != dc_v && !TibetanMachineWeb.isTopVowel(context_2))
l.add(dc_v);
else throw new IllegalArgumentException("dropping vowels is bad");
if (null != dc_v)
return;
}
l.add(dc_v);
//finally, the vowels I and <I
//these vowels are unique in that they both
//require a change from the previous character,
//and consist of two glyphs themselves
return;
}
if (vowel.equals(I_VOWEL)) {
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
DuffCode halfHeight = TibetanMachineWeb.getHalfHeightGlyph(hashKey_context);
DuffCode dc_v_sub = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_A);
DuffCode dc_v_sup = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_i);
if (vowel.equals(U_VOWEL)) {
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
DuffCode halfHeight = TibetanMachineWeb.getHalfHeightGlyph(hashKey_context);
DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_U);
if (!context_added[0]) {
context_added[0] = true;
if (null != context_1)
l.add(context_1);
if (null != context_1)
l.add(context_1);
if (null == halfHeight)
l.add(context_2);
else
l.add(halfHeight);
}
if (null != dc_v_sub && null != dc_v_sup) {
l.add(dc_v_sub);
l.add(dc_v_sup);
} else throw new IllegalArgumentException("dropping vowels is bad");
if (null == halfHeight)
l.add(context_2);
else
l.add(halfHeight);
return;
}
if (null != dc_v && !TibetanMachineWeb.isTopVowel(context_2))
l.add(dc_v);
if (vowel.equals(reverse_I_VOWEL)) {
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
DuffCode halfHeight = TibetanMachineWeb.getHalfHeightGlyph(hashKey_context);
DuffCode dc_v_sub = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_A);
DuffCode[] tv_array = (DuffCode[])TibetanMachineWeb.getTibHash().get(reverse_i_VOWEL);
DuffCode dc_v_sup = tv_array[TibetanMachineWeb.TMW];
return;
}
if (!context_added[0]) {
context_added[0] = true;
if (null != context_1)
l.add(context_1);
//finally, the vowels I and <I
//these vowels are unique in that they both
//require a change from the previous character,
//and consist of two glyphs themselves
if (null == halfHeight)
l.add(context_2);
else
l.add(halfHeight);
}
if (null != dc_v_sub && null != dc_v_sup) {
l.add(dc_v_sub);
l.add(dc_v_sup);
} else throw new IllegalArgumentException("dropping vowels is bad");
if (vowel.equals(I_VOWEL)) {
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
DuffCode halfHeight = TibetanMachineWeb.getHalfHeightGlyph(hashKey_context);
DuffCode dc_v_sub = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_A);
DuffCode dc_v_sup = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_i);
return;
}
if (null != context_1)
l.add(context_1);
if (null == halfHeight)
l.add(context_2);
else
l.add(halfHeight);
if (null != dc_v_sub && null != dc_v_sup) {
l.add(dc_v_sub);
l.add(dc_v_sup);
}
return;
}
if (vowel.equals(reverse_I_VOWEL)) {
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
DuffCode halfHeight = TibetanMachineWeb.getHalfHeightGlyph(hashKey_context);
DuffCode dc_v_sub = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_A);
DuffCode[] tv_array = (DuffCode[])TibetanMachineWeb.getTibHash().get(reverse_i_VOWEL);
DuffCode dc_v_sup = tv_array[TibetanMachineWeb.TMW];
if (null != context_1)
l.add(context_1);
if (null == halfHeight)
l.add(context_2);
else
l.add(halfHeight);
if (null != dc_v_sub && null != dc_v_sup) {
l.add(dc_v_sub);
l.add(dc_v_sup);
}
return;
}
throw new Error("DLC can this happen? " + vowel);
}
throw new IllegalArgumentException("bad vowel " + vowel);
}
/**
* True if you want TibetanMachineWeb-to-Extended-Wylie conversion
@ -844,7 +907,7 @@ public class TibTextUtils implements THDLWylieConstants {
// DLC FIXME: {H}, U+0F7F, is part of a grapheme cluster!
// David Chapman and I both need a comprehensive list of these
// guys.
// guys. Get it from Unicode 4.0 spec?
/** Scans the glyphs in glyphList and creates the returned list of
grapheme clusters based on them. A grapheme cluster is a
consonant or consonant stack with optional adornment or a
@ -889,6 +952,11 @@ public class TibTextUtils implements THDLWylieConstants {
} else if (TibetanMachineWeb.isWylieAdornmentAndContainsVowel(wylie)
|| TibetanMachineWeb.isWylieAdornment(wylie)) {
buildingUpVowel.append(wylie);
// DLC FIXME: I bet three or four vowels together
// breaks TMW->ACIP and TMW->EWTS. Test it. When it
// does, revamp TGCPair to have a set of vowels. The
// output order should be consistent with the
// Unicode-imposed order on vowels.
} else {
// number or weird thing:
@ -1134,12 +1202,6 @@ public class TibTextUtils implements THDLWylieConstants {
if (isAppendageNonVowelWylie(wylie)) {
candidateType
= candidateType.substring("maybe-".length()).intern();
// So that we get 'am, not 'm; 'ang, not 'ng:
// FIXME: cludge: weird place to do this.
// pa'am, not pa'm is what we want, sure,
// but doing this here is ugly.
tp.setWylie(WYLIE_aVOWEL + tp.getWylie());
} else {
if (null != warnings)
warnings.append("Found a tsheg bar that has an achung (" + ACHUNG + ") tacked on, followed by some other thing whose wylie is " + wylie + "\n");
@ -1264,7 +1326,7 @@ public class TibTextUtils implements THDLWylieConstants {
if (TibetanMachineWeb.isWylieTibetanConsonantOrConsonantStack(wylie)
|| TibetanMachineWeb.isWylieSanskritConsonantStack(wylie)) {
translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie));
} else {
} else if (i + 1 < sz) {
if (TGCPair.CONSONANTAL_WITH_VOWEL != cls
&& TGCPair.SANSKRIT_WITH_VOWEL != cls)
translitBuffer.append(EWTSNotACIP ? WYLIE_DISAMBIGUATING_KEY : '-');
@ -1277,7 +1339,8 @@ public class TibTextUtils implements THDLWylieConstants {
int leftover = sz + 1;
// Appendaged vs. not appendaged? it affects nothing at
// this stage.
// this stage except for pa'm vs. pa'am.
boolean appendaged = (candidateType.startsWith("appendaged-"));
candidateType = getCandidateTypeModuloAppendage(candidateType);
if ("prefix/root-root/suffix-suffix/postsuffix" == candidateType) {
@ -1433,7 +1496,9 @@ public class TibTextUtils implements THDLWylieConstants {
// append the wylie/ACIP left over:
for (int i = leftover; i < sz; i++) {
TGCPair tp = (TGCPair)gcs.get(i);
translitBuffer.append(EWTSNotACIP ? tp.getWylie() : tp.getACIP());
translitBuffer.append(EWTSNotACIP
? tp.getWylie(appendaged)
: tp.getACIP(appendaged));
}
}
}
@ -1468,6 +1533,7 @@ public class TibTextUtils implements THDLWylieConstants {
ArrayList glyphList = new ArrayList();
StringBuffer translitBuffer = new StringBuffer();
// DLC FIXME: " " should become " ", and test with ACIP # and *.
for (int i=0; i<dcs.length; i++) {
char ch = dcs[i].getCharacter();
int k = dcs[i].getCharNum();
@ -1482,6 +1548,14 @@ public class TibTextUtils implements THDLWylieConstants {
warnings.append("Some glyphs came right before a newline; they did not have a tsheg or shad come first.");
}
// In ACIP, \n\n (or \r\n\r\n with DOS line feeds)
// indicates a real line break.
if (!EWTSNotACIP && '\n' == ch) {
if (i > 0 && dcs[i - 1].getCharacter() == '\r')
translitBuffer.append("\r\n");
else
translitBuffer.append(ch);
}
translitBuffer.append(ch);
} else {
String wylie = TibetanMachineWeb.getWylieForGlyph(dcs[i], noSuch);

View file

@ -994,6 +994,8 @@ private static boolean isAmbHelper(String y) {
* @return true if x + y is ambiguous in the Extended Wylie
* transliteration, false if not */
public static boolean isAmbiguousWylie(String x, String y) {
// DLC NOW: BDE vs. B+DE -- TMW->ACIP should give B+DE to be very friendly to machines.
// What about ambiguity between wa-zur and wa? dwa vs. d.wa, e.g.?
// Some would say it doesn't matter, because that's illegal. wa
// doesn't take any prefixes. But I want even illegal stuff to
@ -1719,19 +1721,21 @@ private static String acipForGlyph(String hashKey) {
* documented in www/htdocs/TMW_RTF_TO_THDL_WYLIE.html, so change
* them both when you change this. */
private static String getTMWToWylieErrorString(DuffCode dc) {
return "<<[[JSKAD_TMW_TO_WYLIE_ERROR_NO_SUCH_WYLIE: Cannot convert DuffCode "
return "<<[[JSKAD_TMW_TO_WYLIE_ERROR_NO_SUCH_WYLIE: Cannot convert "
+ dc.toString(true)
+ " to THDL Extended Wylie. Please see the documentation for the TMW font and transcribe this yourself.]]>>";
+ " to THDL Extended Wylie. Please see the documentation for the TM or TMW font and transcribe this yourself.]]>>";
}
/** Error that appears in a document when some TMW cannot be
* transcribed in ACIP. This error message is
* documented in www/htdocs/TMW_RTF_TO_THDL_WYLIE.html (DLC NOT YET), so change
* them both when you change this. */
static String getTMWToACIPErrorString(String it) {
return "[# JSKAD_TMW_TO_ACIP_ERROR_NO_SUCH_ACIP: Cannot convert " + it + " to ACIP. Please transcribe this yourself.]";
}
private static String getTMWToACIPErrorString(DuffCode dc) {
return "<<[[JSKAD_TMW_TO_ACIP_ERROR_NO_SUCH_ACIP: Cannot convert DuffCode "
+ dc.toString(true)
+ " to ACIP. Please see the documentation for the TMW font and transcribe this yourself.]]>>";
return getTMWToACIPErrorString(dc.toString(true));
}
/**

View file

@ -446,12 +446,18 @@ public class ACIPConverter {
if (!lastGuyWasNonPunct
|| (null != lastGuy
&& (lpl = lastGuy.get(lastGuy.size() - 1)).size() == 1
&& lpl.get(0).getLeft().equals("G")
&& // it's (G . anything)
// followed by some number
// of spaces (at least one,
// this one) and then a
// comma:
// "GU ," and "KU ," each have
// tshegs, but "GI ," and "KI
// ," each have a Tibetan
// space.
&& ((lpl.get(0).getLeft().equals("G")
|| lpl.get(0).getLeft().equals("K"))
&& (lpl.get(0).getRight().indexOf('U') < 0))
&&
// it's (G . anything)
// followed by some number of
// spaces (at least one, this
// one) and then a comma:
peekaheadFindsSpacesAndComma(scan, i+1))) {
if (null != writer) {
unicode = " ";

View file

@ -21,6 +21,7 @@ package org.thdl.tib.text.ttt;
import java.util.HashSet;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.StringTokenizer;
import java.util.List;
import org.thdl.tib.text.DuffCode;
@ -30,7 +31,7 @@ import org.thdl.tib.text.TibTextUtils;
/** Canonizes some facts regarding the ACIP transcription system.
* @author David Chandler */
class ACIPRules {
public class ACIPRules {
/** {Ksh}, the longest consonant, has 3 characters, so this is
* three. */
public static int MAX_CONSONANT_LENGTH = 3;
@ -66,7 +67,7 @@ class ACIPRules {
// DLC I'm on my own with 'O and 'E and 'OO and 'EE, but
// GANG'O appears and I wonder... so here they are. It's
// consistent with 'I and 'A and 'U, at least: all the vowels
// may appear as K'vowel.
// may appear as K'vowel. DLC FIMXE: ask.
acipVowels.add(baseVowels[i][0]);
acipVowels.add('\'' + baseVowels[i][0]);
@ -140,6 +141,43 @@ class ACIPRules {
return consonants.contains(acip);
}
private static HashMap wylieToACIP = null;
/** Returns the ACIP transliteration corresponding to the THDL
Extended Wylie <em>atom</em> EWTS, or null if EWTS is not
recognized. */
public static String getACIPForEWTS(String EWTS) {
getWylieForACIPConsonant(null);
getWylieForACIPOther(null);
getWylieForACIPVowel(null);
String ans = (String)wylieToACIP.get(EWTS);
if (null == ans) {
StringBuffer finalAns = new StringBuffer(EWTS.length());
StringTokenizer sTok = new StringTokenizer(EWTS, "-+", true);
while (sTok.hasMoreTokens()) {
String part, tok = sTok.nextToken();
if (tok.equals("-") || tok.equals("+"))
part = tok;
else
part = (String)wylieToACIP.get(tok);
if (null == part) return null;
finalAns.append(part);
}
return finalAns.toString();
}
return ans;
}
/** Registers acip->wylie mappings in toWylie; registers
wylie->acip mappings in {@link #wylieToACIP}. */
private static void putMapping(HashMap toWylie, String ACIP, String EWTS) {
toWylie.put(ACIP, EWTS);
if (null == wylieToACIP) {
wylieToACIP = new HashMap(75);
wylieToACIP.put("_", " "); // oddball.
}
wylieToACIP.put(EWTS, ACIP);
}
private static HashMap acipConsonant2wylie = null;
/** Returns the EWTS corresponding to the given ACIP consonant
* (without the "A" vowel). Returns null if there is no such
@ -149,52 +187,52 @@ class ACIPRules {
acipConsonant2wylie = new HashMap(37);
// oddball:
acipConsonant2wylie.put("V", "w");
putMapping(acipConsonant2wylie, "V", "w");
// more oddballs:
acipConsonant2wylie.put("DH", "d+h");
acipConsonant2wylie.put("BH", "b+h");
acipConsonant2wylie.put("dH", "D+h");
acipConsonant2wylie.put("DZH", "dz+h");
acipConsonant2wylie.put("Ksh", "k+Sh");
acipConsonant2wylie.put("GH", "g+h");
putMapping(acipConsonant2wylie, "DH", "d+h");
putMapping(acipConsonant2wylie, "BH", "b+h");
putMapping(acipConsonant2wylie, "dH", "D+h");
putMapping(acipConsonant2wylie, "DZH", "dz+h");
putMapping(acipConsonant2wylie, "Ksh", "k+Sh");
putMapping(acipConsonant2wylie, "GH", "g+h");
acipConsonant2wylie.put("K", "k");
acipConsonant2wylie.put("KH", "kh");
acipConsonant2wylie.put("G", "g");
acipConsonant2wylie.put("NG", "ng");
acipConsonant2wylie.put("C", "c");
acipConsonant2wylie.put("CH", "ch");
acipConsonant2wylie.put("J", "j");
acipConsonant2wylie.put("NY", "ny");
acipConsonant2wylie.put("T", "t");
acipConsonant2wylie.put("TH", "th");
acipConsonant2wylie.put("D", "d");
acipConsonant2wylie.put("N", "n");
acipConsonant2wylie.put("P", "p");
acipConsonant2wylie.put("PH", "ph");
acipConsonant2wylie.put("B", "b");
acipConsonant2wylie.put("M", "m");
acipConsonant2wylie.put("TZ", "ts");
acipConsonant2wylie.put("TS", "tsh");
acipConsonant2wylie.put("DZ", "dz");
acipConsonant2wylie.put("W", "w");
acipConsonant2wylie.put("ZH", "zh");
acipConsonant2wylie.put("Z", "z");
acipConsonant2wylie.put("'", "'");
acipConsonant2wylie.put("Y", "y");
acipConsonant2wylie.put("R", "r");
acipConsonant2wylie.put("L", "l");
acipConsonant2wylie.put("SH", "sh");
acipConsonant2wylie.put("S", "s");
acipConsonant2wylie.put("H", "h");
acipConsonant2wylie.put("A", "a");
acipConsonant2wylie.put("t", "T");
acipConsonant2wylie.put("th", "Th");
acipConsonant2wylie.put("d", "D");
acipConsonant2wylie.put("n", "N");
acipConsonant2wylie.put("sh", "Sh");
putMapping(acipConsonant2wylie, "K", "k");
putMapping(acipConsonant2wylie, "KH", "kh");
putMapping(acipConsonant2wylie, "G", "g");
putMapping(acipConsonant2wylie, "NG", "ng");
putMapping(acipConsonant2wylie, "C", "c");
putMapping(acipConsonant2wylie, "CH", "ch");
putMapping(acipConsonant2wylie, "J", "j");
putMapping(acipConsonant2wylie, "NY", "ny");
putMapping(acipConsonant2wylie, "T", "t");
putMapping(acipConsonant2wylie, "TH", "th");
putMapping(acipConsonant2wylie, "D", "d");
putMapping(acipConsonant2wylie, "N", "n");
putMapping(acipConsonant2wylie, "P", "p");
putMapping(acipConsonant2wylie, "PH", "ph");
putMapping(acipConsonant2wylie, "B", "b");
putMapping(acipConsonant2wylie, "M", "m");
putMapping(acipConsonant2wylie, "TZ", "ts");
putMapping(acipConsonant2wylie, "TS", "tsh");
putMapping(acipConsonant2wylie, "DZ", "dz");
putMapping(acipConsonant2wylie, "W", "w");
putMapping(acipConsonant2wylie, "ZH", "zh");
putMapping(acipConsonant2wylie, "Z", "z");
putMapping(acipConsonant2wylie, "'", "'");
putMapping(acipConsonant2wylie, "Y", "y");
putMapping(acipConsonant2wylie, "R", "r");
putMapping(acipConsonant2wylie, "L", "l");
putMapping(acipConsonant2wylie, "SH", "sh");
putMapping(acipConsonant2wylie, "S", "s");
putMapping(acipConsonant2wylie, "H", "h");
putMapping(acipConsonant2wylie, "A", "a");
putMapping(acipConsonant2wylie, "t", "T");
putMapping(acipConsonant2wylie, "th", "Th");
putMapping(acipConsonant2wylie, "d", "D");
putMapping(acipConsonant2wylie, "n", "N");
putMapping(acipConsonant2wylie, "sh", "Sh");
}
return (String)acipConsonant2wylie.get(acip);
}
@ -207,14 +245,14 @@ class ACIPRules {
acipVowel2wylie = new HashMap(baseVowels.length * 4);
for (int i = 0; i < baseVowels.length; i++) {
acipVowel2wylie.put(baseVowels[i][0], baseVowels[i][1]);
acipVowel2wylie.put('\'' + baseVowels[i][0], baseVowels[i][2]);
acipVowel2wylie.put(baseVowels[i][0] + 'm', baseVowels[i][1] + 'M');
acipVowel2wylie.put('\'' + baseVowels[i][0] + 'm', baseVowels[i][2] + 'M');
acipVowel2wylie.put(baseVowels[i][0] + ':', baseVowels[i][1] + 'H');
acipVowel2wylie.put('\'' + baseVowels[i][0] + ':', baseVowels[i][2] + 'H');
acipVowel2wylie.put(baseVowels[i][0] + "m:", baseVowels[i][1] + "MH");
acipVowel2wylie.put('\'' + baseVowels[i][0] + "m:", baseVowels[i][2] + "MH");
putMapping(acipVowel2wylie, baseVowels[i][0], baseVowels[i][1]);
putMapping(acipVowel2wylie, '\'' + baseVowels[i][0], baseVowels[i][2]);
putMapping(acipVowel2wylie, baseVowels[i][0] + 'm', baseVowels[i][1] + 'M');
putMapping(acipVowel2wylie, '\'' + baseVowels[i][0] + 'm', baseVowels[i][2] + 'M');
putMapping(acipVowel2wylie, baseVowels[i][0] + ':', baseVowels[i][1] + 'H');
putMapping(acipVowel2wylie, '\'' + baseVowels[i][0] + ':', baseVowels[i][2] + 'H');
putMapping(acipVowel2wylie, baseVowels[i][0] + "m:", baseVowels[i][1] + "MH");
putMapping(acipVowel2wylie, '\'' + baseVowels[i][0] + "m:", baseVowels[i][2] + "MH");
}
}
return (String)acipVowel2wylie.get(acip);
@ -228,27 +266,27 @@ class ACIPRules {
acipOther2wylie = new HashMap(20);
// DLC FIXME: check all these again.
acipOther2wylie.put(",", "/");
acipOther2wylie.put(" ", " ");
acipOther2wylie.put(".", "*");
acipOther2wylie.put("|", "|");
acipOther2wylie.put("`", "!");
acipOther2wylie.put(";", ";");
acipOther2wylie.put("*", "@");
acipOther2wylie.put("#", "@#");
acipOther2wylie.put("%", "~X");
acipOther2wylie.put("&", "&");
putMapping(acipOther2wylie, ",", "/");
putMapping(acipOther2wylie, " ", " ");
putMapping(acipOther2wylie, ".", "*");
putMapping(acipOther2wylie, "|", "|");
putMapping(acipOther2wylie, "`", "!");
putMapping(acipOther2wylie, ";", ";");
putMapping(acipOther2wylie, "*", "@");
putMapping(acipOther2wylie, "#", "@#");
putMapping(acipOther2wylie, "%", "~X");
putMapping(acipOther2wylie, "&", "&");
acipOther2wylie.put("0", "0");
acipOther2wylie.put("1", "1");
acipOther2wylie.put("2", "2");
acipOther2wylie.put("3", "3");
acipOther2wylie.put("4", "4");
acipOther2wylie.put("5", "5");
acipOther2wylie.put("6", "6");
acipOther2wylie.put("7", "7");
acipOther2wylie.put("8", "8");
acipOther2wylie.put("9", "9");
putMapping(acipOther2wylie, "0", "0");
putMapping(acipOther2wylie, "1", "1");
putMapping(acipOther2wylie, "2", "2");
putMapping(acipOther2wylie, "3", "3");
putMapping(acipOther2wylie, "4", "4");
putMapping(acipOther2wylie, "5", "5");
putMapping(acipOther2wylie, "6", "6");
putMapping(acipOther2wylie, "7", "7");
putMapping(acipOther2wylie, "8", "8");
putMapping(acipOther2wylie, "9", "9");
}
return (String)acipOther2wylie.get(acip);
}
@ -465,39 +503,52 @@ class ACIPRules {
/** Gets the duffcodes for vowel, such that they look good with
* the stack with hash key hashKey, and appends them to r. */
static void getDuffForACIPVowel(ArrayList r, DuffCode preceding, String vowel) {
static void getDuffForACIPVowel(ArrayList duff, DuffCode preceding, String vowel) {
if (null == vowel) return;
if (null == getWylieForACIPVowel(vowel)) // FIXME: expensive assertion! Use assert.
throw new IllegalArgumentException("Vowel " + vowel + " isn't in the small set of vowels we handle correctly.");
// Order matters here.
boolean context_added[] = new boolean[] { false };
if (vowel.startsWith("A")) {
TibTextUtils.getVowel(r, preceding, THDLWylieConstants.WYLIE_aVOWEL);
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.WYLIE_aVOWEL, context_added);
} else if (vowel.indexOf("'U") >= 0) {
TibTextUtils.getVowel(r, preceding, "U");
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.U_VOWEL, context_added);
} else if (vowel.indexOf("'I") >= 0) {
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.I_VOWEL, context_added);
} else {
if (vowel.indexOf('\'') >= 0)
TibTextUtils.getVowel(r, preceding, THDLWylieConstants.A_VOWEL);
if (vowel.indexOf("EE") >= 0)
TibTextUtils.getVowel(r, preceding, THDLWylieConstants.ai_VOWEL);
else if (vowel.indexOf('E') >= 0)
TibTextUtils.getVowel(r, preceding, THDLWylieConstants.e_VOWEL);
if (vowel.indexOf("OO") >= 0)
TibTextUtils.getVowel(r, preceding, THDLWylieConstants.au_VOWEL);
else if (vowel.indexOf('O') >= 0)
TibTextUtils.getVowel(r, preceding, THDLWylieConstants.o_VOWEL);
if (vowel.indexOf('I') >= 0)
TibTextUtils.getVowel(r, preceding, THDLWylieConstants.i_VOWEL);
if (vowel.indexOf('U') >= 0)
TibTextUtils.getVowel(r, preceding, THDLWylieConstants.u_VOWEL);
if (vowel.indexOf('i') >= 0)
TibTextUtils.getVowel(r, preceding, THDLWylieConstants.reverse_i_VOWEL);
if (vowel.indexOf('\'') >= 0) {
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.A_VOWEL, context_added);
}
if (vowel.indexOf("EE") >= 0) {
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.ai_VOWEL, context_added);
} else if (vowel.indexOf('E') >= 0) {
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.e_VOWEL, context_added);
}
if (vowel.indexOf("OO") >= 0) {
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.au_VOWEL, context_added);
} else if (vowel.indexOf('O') >= 0) {
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.o_VOWEL, context_added);
}
if (vowel.indexOf('I') >= 0) {
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.i_VOWEL, context_added);
}
if (vowel.indexOf('U') >= 0) {
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.u_VOWEL, context_added);
}
if (vowel.indexOf('i') >= 0) {
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.reverse_i_VOWEL, context_added);
}
}
// DLC FIXME: Use TMW9.61, the "o'i" special combination, when appropriate.
if (vowel.indexOf('m') >= 0)
r.add(TibetanMachineWeb.getGlyph("M"));
if (vowel.indexOf('m') >= 0) {
DuffCode last = (DuffCode)duff.get(duff.size() - 1);
duff.remove(duff.size() - 1);
TibTextUtils.getBindu(duff, last);
}
if (vowel.indexOf(':') >= 0)
r.add(TibetanMachineWeb.getGlyph("H"));
duff.add(TibetanMachineWeb.getGlyph("H"));
}
}

View file

@ -1,3 +1,4 @@
// DLC NOW: KAsh ->Ksh here! optionally!
/*
The contents of this file are subject to the THDL Open Community License
Version 1.0 (the "License"); you may not use this file except in compliance

View file

@ -340,6 +340,22 @@ tstHelper("KA'", "[(K . A), (' . )]",
new String[] { },
"{G+G}{YE}{S}");
// DLC FIXME: warn about BDE vs. B+DE. color such differently. Maybe an inputter saw B+DE and typed in BDE, not thinking.
tstHelper("BDE", "{B}{DE}",
new String[] { "{B}{DE}", "{B+DE}" },
new String[] { "{B}{DE}" },
"{B}{DE}");
tstHelper("SHR'I", "{SH}{R'I}",
null,
null,
"{SH+R'I}");
// DLC FIXME: test EWTS {pouM}
// DLC FIXME: do TMW->ACIP->TMW->ACIP round-trip.
tstHelper("DRUG", "{D}{RU}{G}",
new String[] { "{D}{RU}{G}", "{D+RU}{G}" },
new String[] { "{D+RU}{G}" },
@ -7302,6 +7318,7 @@ tstHelper("ZUR");
"\u0f04\u0f05\u0f04\u0f05\u0f05\u0f67\u0f74\u0f7e\u0f7f\u0f0b\u0f42\u0fa2\u0f7d\u0f0b\u0f42\u0fb2\u0f7c\u0f08\u0f11\u0f0c\u0f0d");
uhelp("*#HUm: K+DHA GRO`;.,",
"\u0f04\u0f05\u0f04\u0f05\u0f05\u0f67\u0f74\u0f7e\u0f7f\u0f0b[#ERROR CONVERTING ACIP DOCUMENT: THE TSHEG BAR (\"SYLLABLE\") K+DHA IS ESSENTIALLY NOTHING.]\u0f0b\u0f42\u0fb2\u0f7c\u0f08\u0f11\u0f0c\u0f0d");
// DLC FIXME: the file ACIP_SHRI should be made into an ACIP->TMW automated test case
}
/** Tests some more tsheg bars, these from Dr. Lacey's critical

View file

@ -197,32 +197,42 @@ class TParseTree {
* stack can take every prefix, which is not the case in
* reality */
public TStackListList getUniqueParse(boolean noPrefixTests) {
TStackListList allLegalParses = new TStackListList(2); // save memory
// For Sanskrit+Tibetan:
TStackListList allNonillegalParses = new TStackListList(2); // save memory
// For Tibetan only:
TStackListList allStrictlyLegalParses = new TStackListList(2); // save memory
TStackListList legalParsesWithVowelOnRoot = new TStackListList(1);
ParseIterator pi = getParseIterator();
while (pi.hasNext()) {
TStackList sl = pi.next();
BoolPair bpa = sl.isLegalTshegBar(noPrefixTests);
if (bpa.isLegal) {
if (bpa.isLegalAndHasAVowelOnRoot)
BoolTriple bt = sl.isLegalTshegBar(noPrefixTests);
if (bt.isLegal) {
if (bt.isLegalAndHasAVowelOnRoot)
legalParsesWithVowelOnRoot.add(sl);
allLegalParses.add(sl);
if (!bt.isLegalButSanskrit)
allStrictlyLegalParses.add(sl);
allNonillegalParses.add(sl);
}
}
if (legalParsesWithVowelOnRoot.size() == 1)
return legalParsesWithVowelOnRoot;
else {
if (allStrictlyLegalParses.size() == 1)
return allStrictlyLegalParses;
if (allStrictlyLegalParses.size() > 2)
throw new Error("can this happen?");
if (legalParsesWithVowelOnRoot.size() == 2) {
if (legalParsesWithVowelOnRoot.get(0).size() != 1 + legalParsesWithVowelOnRoot.get(1).size())
throw new Error("Something other than the G-YA vs. GYA case appeared. Sorry for your trouble! " + legalParsesWithVowelOnRoot.get(0) + " ;; " + legalParsesWithVowelOnRoot.get(1));
return new TStackListList(legalParsesWithVowelOnRoot.get(1));
}
if (allLegalParses.size() == 2) {
if (allLegalParses.get(0).size() != 1 + allLegalParses.get(1).size())
throw new Error("Something other than the G-YA vs. GYA case appeared. Sorry for your trouble! " + allLegalParses.get(0) + " ;; " + allLegalParses.get(1));
return new TStackListList(allLegalParses.get(1));
if (allNonillegalParses.size() == 2) {
if (allNonillegalParses.get(0).size() != 1 + allNonillegalParses.get(1).size())
throw new Error("Something other than the G-YA vs. GYA case appeared. Sorry for your trouble! " + allNonillegalParses.get(0) + " ;; " + allNonillegalParses.get(1));
return new TStackListList(allNonillegalParses.get(1));
}
return allLegalParses;
return allNonillegalParses;
}
}

View file

@ -121,16 +121,16 @@ class TStackList {
* happen. */
public ListIterator listIterator() { return al.listIterator(); }
/** Returns a pair with {@link BoolPair#isLegal} true if and only
* if this list of stacks is a legal tsheg bar by the rules of
* Tibetan syntax (sometimes called rules of spelling). If this
* is legal, then {@link BoolPair#isLegalAndHasAVowelOnRoot} will
* be true if and only if there is an explicit {A} vowel on the
* root stack.
/** Returns a pair with {@link BoolTriple#isLegal} true if and
* only if this list of stacks is a legal tsheg bar by the rules
* of Tibetan syntax (sometimes called rules of spelling). If
* this is legal, then {@link
* BoolTriple#isLegalAndHasAVowelOnRoot} will be true if and only
* if there is an explicit {A} vowel on the root stack.
* @param noPrefixTests true if you want to pretend that every
* stack can take every prefix, which is not the case in
* reality */
public BoolPair isLegalTshegBar(boolean noPrefixTests) {
public BoolTriple isLegalTshegBar(boolean noPrefixTests) {
// DLC handle PADMA and other Tibetanized Sanskrit fellows consistently. Right now we only treat single-stack Sanskrit guys as legal.
TTGCList tgcList = new TTGCList(this);
@ -162,7 +162,9 @@ class TStackList {
}
}
}
return new BoolPair(isLegal, isLegalAndHasAVowelOnRoot);
return new BoolTriple(isLegal,
(candidateType == "single-sanskrit-gc"),
isLegalAndHasAVowelOnRoot);
}
private static final boolean ddebug = false;
@ -232,11 +234,15 @@ class TStackList {
}
/** Too simple to comment. */
class BoolPair {
class BoolTriple {
boolean isLegal;
boolean isLegalButSanskrit; // some subset are legal but legal Sanskrit -- the single sanskrit stacks are this way, such as B+DE.
boolean isLegalAndHasAVowelOnRoot;
BoolPair(boolean isLegal, boolean isLegalAndHasAVowelOnRoot) {
BoolTriple(boolean isLegal,
boolean isLegalButSanskrit,
boolean isLegalAndHasAVowelOnRoot) {
this.isLegal = isLegal;
this.isLegalButSanskrit = isLegalButSanskrit;
this.isLegalAndHasAVowelOnRoot = isLegalAndHasAVowelOnRoot;
}
}