From 35a9869aacb457d6b811398e248214f5580fe6df Mon Sep 17 00:00:00 2001 From: amontano Date: Sun, 23 Mar 2003 20:27:54 +0000 Subject: [PATCH] 1. Fixed parsing error 2. Added support extreme uses of 'a' like le'u'i'o 3. Now parses correctly syllables that have the particles "ang" and "am" added to them. Second works only in "roman script" mode. The converter from tibetan script to roman script does not convert correctly this combinations. ("pa'ang" is converted wrongly into "pa'ng" and "pa'am" is converted wrongly into "pa'ma"). --- .../thdl/tib/scanner/LocalTibetanScanner.java | 46 +++++++++++++++---- source/org/thdl/tib/scanner/Manipulate.java | 3 +- 2 files changed, 40 insertions(+), 9 deletions(-) diff --git a/source/org/thdl/tib/scanner/LocalTibetanScanner.java b/source/org/thdl/tib/scanner/LocalTibetanScanner.java index 8384917..1f3ed03 100644 --- a/source/org/thdl/tib/scanner/LocalTibetanScanner.java +++ b/source/org/thdl/tib/scanner/LocalTibetanScanner.java @@ -76,6 +76,7 @@ public class LocalTibetanScanner implements TibetanScanner Enumeration enum; Word w; String silSinDec; + boolean aadded; if (silActual==null) silActual = raiz; @@ -94,22 +95,32 @@ public class LocalTibetanScanner implements TibetanScanner else { silSinDec = withOutDec(sil); - if (silSinDec!=null) + resultado=null; + // while to take into account very weird cases like le'u'i'o + while (resultado == null && silSinDec!=null) { resultado = silAnterior.lookUp(silSinDec); if (resultado == null) { silSinDec += "\'"; resultado = silAnterior.lookUp(silSinDec); + aadded=true; } + else aadded=false; if (resultado!=null && resultado.hasDef()) { lastCompWord = concatWithSpace(wordActual, silSinDec); lastCompSil = resultado; wordActual = concatWithSpace(wordActual, sil); + floatingSil.removeAllElements(); + } + else + { + resultado = null; + if (aadded) silSinDec = silSinDec.substring(0, silSinDec.length()-1); + silSinDec = withOutDec(silSinDec); } - else resultado = null; } if (resultado!=null) return; @@ -121,14 +132,18 @@ public class LocalTibetanScanner implements TibetanScanner else { silSinDec = withOutDec(sil); - if (silSinDec!=null) + resultado = null; + // while to take into account very weird cases like le'u'i'o + while (resultado==null && silSinDec!=null) { resultado = silAnterior.lookUp(silSinDec); if (resultado == null) { silSinDec += "\'"; resultado = silAnterior.lookUp(silSinDec); + aadded=true; } + else aadded=false; // si funciona sin declension arreglado problema if (resultado!=null && resultado.hasDef()) { @@ -136,7 +151,13 @@ public class LocalTibetanScanner implements TibetanScanner resetAll(); floatingSil.removeAllElements(); } - else resultado = null; + else + { + resultado = null; + if (aadded) silSinDec = silSinDec.substring(0, silSinDec.length()-1); + silSinDec = withOutDec(silSinDec); + } + } if (resultado!=null) return; @@ -207,7 +228,7 @@ public class LocalTibetanScanner implements TibetanScanner private static String withOutDec(String sil) { boolean isDeclined =false; - int len = sil.length(); + int len = sil.length(), apos; if (len<3) return null; @@ -217,10 +238,19 @@ public class LocalTibetanScanner implements TibetanScanner isDeclined=true; sil = sil.substring(0, len-1); } - else if ((lastCar == 'i' || lastCar == 'o') && sil.charAt(len-2)=='\'') + else { - isDeclined=true; - sil = sil.substring(0, len-2); + apos = sil.lastIndexOf('\''); + if (apos>0 && apos < len-1 && Manipulate.isVowel(sil.charAt(apos-1)) && sil.charAt(apos+1)!='u') + { + isDeclined=true; + sil = sil.substring(0, apos); + } + /* if ((lastCar == 'i' || lastCar == 'o') && sil.charAt(len-2)=='\'') + { + isDeclined=true; + sil = sil.substring(0, len-2); + }*/ } if (!isDeclined) return null; diff --git a/source/org/thdl/tib/scanner/Manipulate.java b/source/org/thdl/tib/scanner/Manipulate.java index 0ea21f1..11a3fae 100644 --- a/source/org/thdl/tib/scanner/Manipulate.java +++ b/source/org/thdl/tib/scanner/Manipulate.java @@ -110,7 +110,7 @@ public class Manipulate aa -> a, a'a -> A, ai->i, aee ->ai, au->u, aoo->au, ae->e, ao->o, ee->ai, oo->au, 'I->-I I->-i, a'i->I, a'u->U, a'e->E, a'o->O, a'i->I, a'u->U, a'e->E, a'o->O, ,->/, # -> @##, * -> @#, \ -> ?, ` -> !, - /-/ -> (-), ga-y -> g.y, g-y -> g.y */ + /-/ -> (-), ga-y -> g.y, g-y -> g.y, na-y -> n+y */ nuevaLinea = replace(nuevaLinea, "ts", "tq"); nuevaLinea = replace(nuevaLinea, "tz", "ts"); @@ -141,6 +141,7 @@ public class Manipulate nuevaLinea = replace(nuevaLinea, "`", "!"); nuevaLinea = replace(nuevaLinea, "ga-y", "g.y"); nuevaLinea = replace(nuevaLinea, "g-y", "g.y"); + nuevaLinea = replace(nuevaLinea, "na-y", "n+y"); len = nuevaLinea.length(); for (i=0; i