diff --git a/Aro phonetics scheme.txt b/Aro phonetics scheme.txt index 03486be..0cd7f30 100644 --- a/Aro phonetics scheme.txt +++ b/Aro phonetics scheme.txt @@ -96,20 +96,14 @@ x ng ; pronunciation (which may also contain spaces). A semicolon ; precedes a comment. Blank lines are OK. -ba : wa ; mind you, ba (pronounced ba) means cow. But that's much rarer than wa. -bo : wo -ba'i : wa'i -bo'i : wo'i -bar : ?bar/war? ; bar = "middle"; could be either, so supply both and let user sort it out -bor : ?bor/wor? ; bor = "cast away"; could be either, so supply both and let user sort it out -rdo rje : dorjé -mkha' 'gro : khandro -sku mnye : kumnyé -sprul sku : tulku -mtsho rgyal : tsogyèl -rta mgrin: tamdrin -dga' ldan : ganden -dge 'dun : gendün -a mdo : amdo -srid pa : sipa -pad ma : pèma \ No newline at end of file +rdo rje > dorjé +mkha' 'gro > khandro +sku mnye > kumnyé +sprul sku > tulku +mtsho rgyal > tsogyèl +rta mgrin> tamdrin +dga' ldan > ganden +dge 'dun > gendün +a mdo > amdo +srid pa > sipa +pad ma > pèma \ No newline at end of file diff --git a/Aro phonetics test cases.txt b/Aro phonetics test cases.txt index 7348d4f..5039601 100644 --- a/Aro phonetics test cases.txt +++ b/Aro phonetics test cases.txt @@ -40,7 +40,7 @@ klad pa > l glog > log le'u > lé'u pa'ang > pa'ang -ba'i > wa'i +bar ba'i > barwa'i rta mgrin > tamdrin ; Other tests, to exercise particular rules in the grammar that aren't covered in the rules above diff --git a/THDL phonetics scheme.txt b/THDL phonetics scheme.txt index fcf07a9..b94e99a 100644 --- a/THDL phonetics scheme.txt +++ b/THDL phonetics scheme.txt @@ -18,16 +18,26 @@ ; compactly. For example, it would be difficult to capture the ; effects of preinitial consonants on tone (as in the scheme used ; in Joe Wilson's book, for instance).) Also note that not even the -; whole of the present scheme is implemented using these rules. In -; particular, the deletion of prefix and superscript consonants, +; whole of the present scheme is implemented using these rules. For +; example, the deletion of prefix and superscript consonants, ; and of wa-zur, are done in program code, not using the rules here. +; This makes e come out é only when the last letter in a "word" (*not* +; syllable). Our grammar engine is not nearly powerful enough to do +; this in a clean way. + + ; Miscellaneous prefix transformations g. ; delete this (representing g prefix, used before root y only) dby y ; must come before db->w, for dbyang dbr r ; must come before db->w, for dbral db w ; must come before by->j +; Removal of confusing 'h's +th t +ph p +tsh ts + ; c and ch are both transcribed ch. To get this we need a kludge ; (involving x), because the rule c -> ch would apply recursively. ch c @@ -42,10 +52,10 @@ my ny ; Retroflexes kr tr -khr thr +khr tr gr dr pr tr -phr thr +phr tr br dr ; Other bad behavior from R @@ -55,7 +65,7 @@ sr s ; Uniquely random case zl d -; Umlaut of a, o, u followed by d, n, l, s +; Umlaut of a, o, u followed by d, n, l, s, and 'i ; Note: this must be done before suffix-stripping. ; Before actually doing the umlaut, we "hide" the n in ng, so that ng doesn't ; induce umlaut. This is gross; if we had a real grammar engine it wouldn't @@ -65,17 +75,24 @@ ad e an en al el as e +a'i e od ö on ön ol öl os ö +o'i ö ud ü un ün ul ül us ü +u'i ü ; restore ng x ng +; Stripping of 'i from e'i +; (It is stripped from a, o, u by umlaut rules, and from i by vowel-doubling rule.) +e'i e + ; Stripping of suffix d, s, and ' from i and e ; Note: this has already been done by the umlaut rules for some cases, ; which don't need to be repeated here. @@ -112,22 +129,27 @@ ub up ; There is one exception per line. Each exception consists of ; the transliteration (which may be several syllables separated -; by spaces), followed by a space, a colon, a space, and the +; by spaces), followed by a space, a greater-than, a space, and the ; pronunciation (which may also contain spaces). A semicolon ; precedes a comment. Blank lines are OK. -ba : wa ; mind you, ba (pronounced ba) means cow. But that's much rarer than wa. -bo : wo -ba'i : wai -bo'i : woi -bar : ?bar/war? ; bar = "middle"; could be either, so supply both and let user sort it out -bor : ?bor/wor? ; bor = "cast away"; could be either, so supply both and let user sort it out -rdo rje : dorje -mkha' 'gro : khandro -sprul sku : tulku -rta mgrin: tamdrin -dga' ldan : ganden -dge 'dun : gendün -a mdo : amdo -blo bzang : lobzang -sbra nag zhol : banakzhöl +mkha' 'gro > khandro +sprul sku > tulku +rta mgrin > tamdrin +dga' ldan > ganden +dge 'dun > gendün +a mdo > amdo +bka' 'gyur > kangyur +rgyu 'bras > gyundré +ngos 'dzin > ngöndzin +chab mdo > chamdo +dpal ldan > penden +dpal 'bar > pembar +rig 'dzin > rindzin +skyabs 'gro > kyamdro +'bri ru > biru +sbra nag zhol > banakzhöl +rdo rje > dorje +o rgyan > orgyen +lha rje > lharjé +rgyal rtse > gyantsé \ No newline at end of file diff --git a/THDL phonetics test cases.txt b/THDL phonetics test cases.txt index e3ad54c..4692667 100644 --- a/THDL phonetics test cases.txt +++ b/THDL phonetics test cases.txt @@ -1,72 +1,120 @@ ; -; These examples come from the draft (8/21/03) THDL Phonetics document +; These examples mostly come from the THDL Phonetics document (Jan 2004 draft) ; -lha sa > lhasa +dag pa > dakpa +ring po > ringpo +rin chen > rinchen +lab > lap +dum bu > dumbu +dmar po > marpo +ril bu > rilbu sa skya pa > sakyapa -blo bzang > lobzang +blo bzang > lozang rnying ma pa > nyingmapa -rdo rje > dorje +rdo rje > dorjé dge lugs pa > gelukpa -gzhis ka rtse > zhikatse -mar me > marme +gzhis ka rtse > zhikatsé +mar me > marmé +dge bshes > geshé bcu > chu -lce > che -rin chen bzang po > rinchenzangpo +gcig pa > chikpa nag chu > nakchu -bka' rgyud pa > kagyüpa +'phag pa > pakpa +gser thang > sertang +khang tshan > khangtsen +lce > ché +rin chen bzang po > rinchenzangpo +bka' rgyud > kagyü bsod nams> sönam -thub bstan > thupten +yul > yül +dus tshod > dütsö +bon po > bönpo +sde dge > degé +brgyad > gyé +dge rgan > gegen +ral pa can > relpachen +tshe ring > tsering +byes > jé +bstan 'dzin > tendzin 'jam dpal dbyangs > jampelyang dge legs > gelek kha btags > khatak +sngags pa > ngakpa +byang chub > jangchup +thub bstan > tupten +tabs > tap bka' shag > kashak sbra nag zhol > banakzhöl -thabs > thap +thabs > tap lha sa ba > lhasawa jo bo > jowo dpa' bo > pawo -spyan ras gzig > chenrezik +gsal bar > selwar +; nga'i deb > ngé dep -- can't do this one, it depends on word segmentation +bar ba > barwa +spyan ras gzig > chenrezik +phyag > chak sbyin bdag > jindak smyong > nyong +dmyal ba > nyelwa sgrol ma > drölma rten 'brel > tendrel 'bras spungs > drepung -'phrin las > thrinle -dbang > wang -dbral > rel -dbyar kha > yarkha -zla ba > dawa +'phrin las > trinlé +srung ma > sungma +rdzun smra ba > dzünmawa klad pa > lepa glog > lok +zla ba > dawa +lha sa > lhasa +lho phyogs > lhochok +lhun grub > lhündrup +dbang > wang +dbyar kha > yarkha +dbral > rel le'u > leu +khyi'u > khyiu pa'ang > pang -ba'i > wai +gri'i > dri +'gro ba'i > drowé +rgyal bu'i > gyelbü +rin po che'i > rinpoché +bdag po'i > dakpö +le'u'i > leü rta mgrin > tamdrin - -; Other tests, to exercise particular rules in the grammar that aren't covered in the rules above g.yon > yön phyag > chak bkra shis > trashi -khros ma > thröma +khros ma > tröma sprul > trül mri tam ga > mitamga srid pa > sipa pad ma > pema pan chen > penchen -ral pa can > relpachen -thun > thün +thun > tün dus gsum > düsum -sbed > be -ces > che -pa'i > pai -che'i > chei -gri'i > dri -po'i > poi -le'u'i > leui -rdzogs > dzok -thug pa > thukpa -'debs > dep +sbed > bé +ces > ché +btsan dbang > tsenwang +tshong khang > tsongkhang +rdzong > dzong +stabs > tap +thug pa > tukpa +debs > dep sib sib > sipsip lobs pa > loppa grub > drup kla col > lachöl +spyan snga ba > chenngawa +sems dpa'i > sempé +bon po'i > bönpö +rdzogs > dzok + +; Other random tests +phreng > treng + +; Test of second-suffix d removal. Made-up word because I don't know real ones. +rand > ren +; Test that we don't spazz out on single-letter words. +a > a +ai > ai diff --git a/WylieWord development copy.dot b/WylieWord development copy.dot index bb249a7..e404e06 100644 Binary files a/WylieWord development copy.dot and b/WylieWord development copy.dot differ