diff --git a/source/org/thdl/tib/scanner/LocalTibetanScanner.java b/source/org/thdl/tib/scanner/LocalTibetanScanner.java index c7895e2..1fa3df6 100644 --- a/source/org/thdl/tib/scanner/LocalTibetanScanner.java +++ b/source/org/thdl/tib/scanner/LocalTibetanScanner.java @@ -1,44 +1,44 @@ /* -The contents of this file are subject to the AMP Open Community License -Version 1.0 (the "License"); you may not use this file except in compliance -with the License. You may obtain a copy of the License on the AMP web site -(http://www.tibet.iteso.mx/Guatemala/). - -Software distributed under the License is distributed on an "AS IS" basis, -WITHOUT WARRANTY OF ANY KIND, either express or implied. See the -License for the specific terms governing rights and limitations under the -License. - -The Initial Developer of this software is Andres Montano Pellegrini. Portions -created by Andres Montano Pellegrini are Copyright 2001 Andres Montano -Pellegrini. All Rights Reserved. - -Contributor(s): ______________________________________. -*/ + The contents of this file are subject to the AMP Open Community License + Version 1.0 (the "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License on the AMP web site + (http://www.tibet.iteso.mx/Guatemala/). + + Software distributed under the License is distributed on an "AS IS" basis, + WITHOUT WARRANTY OF ANY KIND, either express or implied. See the + License for the specific terms governing rights and limitations under the + License. + + The Initial Developer of this software is Andres Montano Pellegrini. Portions + created by Andres Montano Pellegrini are Copyright 2001 Andres Montano + Pellegrini. All Rights Reserved. + + Contributor(s): ______________________________________. + */ package org.thdl.tib.scanner; import java.util.Enumeration; import java.util.Vector; /** Loads dictionary stored in tree format and searches for words recursively. - How the the dictionary is loaded depends on which implementation of - {@link SyllableListTree} is invoked. - - @author Andrés Montano Pellegrini - @see SyllableListTree -*/ + How the the dictionary is loaded depends on which implementation of + {@link SyllableListTree} is invoked. + + @author Andrés Montano Pellegrini + @see SyllableListTree + */ public class LocalTibetanScanner extends TibetanScanner { public static String archivo; private SyllableListTree raiz, silActual, lastCompSil, silAnterior; private String wordActual, lastCompWord; private Vector floatingSil; - + static { archivo = null; } - + public BitDictionarySource getDictionarySource() { return raiz.getDictionarySourcesWanted(); @@ -46,12 +46,12 @@ public class LocalTibetanScanner extends TibetanScanner public LocalTibetanScanner(String arch) throws Exception { - this (arch, true); + this (arch, true); } - + public LocalTibetanScanner(String arch, boolean backwardCompatible) throws Exception { - super(); + super(); archivo = arch; // raiz = new MemorySyllableListTree(archivo); // raiz = new FileSyllableListTree(archivo); @@ -59,13 +59,13 @@ public class LocalTibetanScanner extends TibetanScanner floatingSil = new Vector(); resetAll(); } - + private void resetAll() { silAnterior = silActual = lastCompSil = null; wordActual = lastCompWord = null; } - + private void scanSyllable(String sil) { SyllableListTree resultado=null; @@ -73,13 +73,13 @@ public class LocalTibetanScanner extends TibetanScanner Word w; String silSinDec; boolean aadded; - + if (silActual==null) silActual = raiz; - + silAnterior = silActual; silActual = silActual.lookUp(sil); - + if (silActual != null) { if (silActual.hasDef()) @@ -113,9 +113,9 @@ public class LocalTibetanScanner extends TibetanScanner } else { - resultado = null; - if (aadded) silSinDec = silSinDec.substring(0, silSinDec.length()-1); - silSinDec = withOutDec(silSinDec); + resultado = null; + if (aadded) silSinDec = silSinDec.substring(0, silSinDec.length()-1); + silSinDec = withOutDec(silSinDec); } } if (resultado!=null) return; @@ -134,9 +134,9 @@ public class LocalTibetanScanner extends TibetanScanner { resultado = silAnterior.lookUp(silSinDec); /* here we don't have to worry about being in the middle of a - word since the declension marks that it is the end of a - word. - */ + word since the declension marks that it is the end of a + word. + */ if (resultado == null || !resultado.hasDef()) { silSinDec += "\'"; @@ -153,26 +153,26 @@ public class LocalTibetanScanner extends TibetanScanner } else { - resultado = null; - if (aadded) silSinDec = silSinDec.substring(0, silSinDec.length()-1); - silSinDec = withOutDec(silSinDec); + resultado = null; + if (aadded) silSinDec = silSinDec.substring(0, silSinDec.length()-1); + silSinDec = withOutDec(silSinDec); } - + } if (resultado!=null) return; if (lastCompSil!=null) { - if (lastCompWord.equals(wordActual)) w = new Word(lastCompWord, lastCompSil.getDefs()); + if (lastCompWord.equals(wordActual)) w = new Word(lastCompWord, lastCompSil.getDefs()); else w = new Word(lastCompWord, wordActual, lastCompSil.getDefs()); wordList.addLast(w); this.resetAll(); - + enumeration = floatingSil.elements(); floatingSil = new Vector(); while (enumeration.hasMoreElements()) scanSyllable((String)enumeration.nextElement()); - + scanSyllable(sil); } else @@ -193,32 +193,32 @@ public class LocalTibetanScanner extends TibetanScanner } } } - + public void finishUp() { Enumeration enumeration; Word w; - + while (lastCompSil!=null) { if (lastCompWord.equals(wordActual)) w = new Word(lastCompWord, lastCompSil.getDefs()); else w = new Word(lastCompWord, wordActual, lastCompSil.getDefs()); wordList.addLast(w); this.resetAll(); - + enumeration = floatingSil.elements(); floatingSil = new Vector(); while (enumeration.hasMoreElements()) scanSyllable((String)enumeration.nextElement()); } - + if (silActual!=null) { wordList.addLast(new Word(wordActual, "[incomplete word]")); this.resetAll(); } } - + private static String concatWithSpace(String s1, String s2) { if (s1==null || s1.equals("")) @@ -226,14 +226,14 @@ public class LocalTibetanScanner extends TibetanScanner else return s1 + ' ' + s2; } - + private static String withOutDec(String sil) { boolean isDeclined =false; int len = sil.length(), apos; - + if (len<3) return null; - + char lastCar = Character.toLowerCase(sil.charAt(len-1)); if ((lastCar == 's' || lastCar == 'r') && Manipulate.isVowel(sil.charAt(len-2))) { @@ -242,19 +242,19 @@ public class LocalTibetanScanner extends TibetanScanner } else { - apos = sil.lastIndexOf('\''); - if (apos>0 && apos < len-1 && Manipulate.isVowel(sil.charAt(apos-1)) && sil.charAt(apos+1)!='u') - { - isDeclined=true; - sil = sil.substring(0, apos); - } - /* if ((lastCar == 'i' || lastCar == 'o') && sil.charAt(len-2)=='\'') - { - isDeclined=true; - sil = sil.substring(0, len-2); - }*/ + apos = sil.lastIndexOf('\''); + if (apos>0 && apos < len-1 && Manipulate.isVowel(sil.charAt(apos-1)) && sil.charAt(apos+1)!='u') + { + isDeclined=true; + sil = sil.substring(0, apos); + } + /* if ((lastCar == 'i' || lastCar == 'o') && sil.charAt(len-2)=='\'') + { + isDeclined=true; + sil = sil.substring(0, len-2); + }*/ } - + if (!isDeclined) return null; return sil; } @@ -262,127 +262,127 @@ public class LocalTibetanScanner extends TibetanScanner public void scanBody(String in) { boolean hayMasLineas=true; - + if (in.equals("")) finishUp(); else { int init = 0, fin; String linea; - + while (hayMasLineas) { fin = in.indexOf("\n",init); if (fin<0) { - linea = in.substring(init).trim(); - hayMasLineas=false; + linea = in.substring(init).trim(); + hayMasLineas=false; } else - linea = in.substring(init, fin).trim(); - + linea = in.substring(init, fin).trim(); + if (linea.equals("")) { - finishUp(); - wordList.addLast(new PunctuationMark('\n')); + finishUp(); + wordList.addLast(new PunctuationMark('\n')); } else - scanLine(linea); - + scanLine(linea); + init = fin+1; } } } - + public void scanLine(String linea) { int init = 0, fin; char ch; String sil; boolean doNotFinishUp; - + if (linea.equals("")) { - finishUp(); - wordList.addLast(new PunctuationMark('\n')); - return; + finishUp(); + wordList.addLast(new PunctuationMark('\n')); + return; } - -outAHere: - while(true) - { - doNotFinishUp=true; - - // Make init skip all punctuation marks - while (true) + + outAHere: + while(true) { - if (init>=linea.length()) - break outAHere; - ch = linea.charAt(init); - if (Manipulate.isPunctuationMark(ch)) - { - if (doNotFinishUp) - { - finishUp(); - doNotFinishUp=false; - } - wordList.addLast(new PunctuationMark(ch)); - } - else if (!Manipulate.isEndOfSyllableMark(ch)) - break; - - init++; - } - - doNotFinishUp = true; - - /* move fin to the end of the next syllable. If finishing - up is necessary it is done after scanSyllable - */ - - fin = init+1; - while (fin < linea.length()) - { - ch = linea.charAt(fin); - if (Manipulate.isPunctuationMark(ch)) - { - doNotFinishUp = false; - break; - } - else if (Manipulate.isEndOfSyllableMark(ch)) - { - break; - } - else - { - fin++; - if (fin>=linea.length()) - break; + doNotFinishUp=true; + + // Make init skip all punctuation marks + while (true) + { + if (init>=linea.length()) + break outAHere; + ch = linea.charAt(init); + if (Manipulate.isPunctuationMark(ch)) + { + if (doNotFinishUp) + { + finishUp(); + doNotFinishUp=false; + } + wordList.addLast(new PunctuationMark(ch)); + } + else if (!Manipulate.isEndOfSyllableMark(ch)) + break; + + init++; } + + doNotFinishUp = true; + + /* move fin to the end of the next syllable. If finishing + up is necessary it is done after scanSyllable + */ + + fin = init+1; + while (fin < linea.length()) + { + ch = linea.charAt(fin); + if (Manipulate.isPunctuationMark(ch)) + { + doNotFinishUp = false; + break; + } + else if (Manipulate.isEndOfSyllableMark(ch)) + { + break; + } + else + { + fin++; + if (fin>=linea.length()) + break; + } + } + + sil = linea.substring(init, fin); + scanSyllable(sil); + + if (!doNotFinishUp) + { + finishUp(); + wordList.addLast(new PunctuationMark(ch)); + } + init = fin+1; } - - sil = linea.substring(init, fin); - scanSyllable(sil); - - if (!doNotFinishUp) - { - finishUp(); - wordList.addLast(new PunctuationMark(ch)); - } - init = fin+1; - } - } - - /** Looks for .dic file, and returns the dictionary descriptions. - Also updates the definitionTags in the Definitions class. - */ - public String[] getDictionaryDescriptions() - { - return FileSyllableListTree.getDictionaryDescriptions(archivo); } - public void destroy() - { - FileSyllableListTree.closeFiles(); - } - + /** Looks for .dic file, and returns the dictionary descriptions. + Also updates the definitionTags in the Definitions class. + */ + public String[] getDictionaryDescriptions() + { + return FileSyllableListTree.getDictionaryDescriptions(archivo); + } + + public void destroy() + { + FileSyllableListTree.closeFiles(); + } + } \ No newline at end of file diff --git a/source/org/thdl/tib/scanner/Manipulate.java b/source/org/thdl/tib/scanner/Manipulate.java index a00fe21..f5f1574 100644 --- a/source/org/thdl/tib/scanner/Manipulate.java +++ b/source/org/thdl/tib/scanner/Manipulate.java @@ -17,6 +17,10 @@ Contributor(s): ______________________________________. */ package org.thdl.tib.scanner; +import org.thdl.tib.text.*; +import org.thdl.tib.text.reverter.*; + + /** Miscelaneous static methods for the manipulation of Tibetan text. @author Andrés Montano Pellegrini @@ -24,7 +28,6 @@ package org.thdl.tib.scanner; public class Manipulate { - private static String endOfParagraphMarks = "/;|!:^@#$%="; private static String bracketMarks = "<>(){}[]"; private static String endOfSyllableMarks = " _\t"; @@ -177,70 +180,7 @@ public class Manipulate { ch = Character.toLowerCase(ch); return ch=='a' || ch=='e' || ch=='i' || ch=='o' || ch=='u'; - } - - public static String wylieToAcip(String palabra) - { - // DLC FIXME: for unknown things, return null. - if (palabra.equals("@##")) return "#"; - if (palabra.equals("@#")) return "*"; - if (palabra.equals("!")) return "`"; - if (palabra.equals("b+h")) return "BH"; - if (palabra.equals("d+h")) return "DH"; - if (palabra.equals("X")) return null; - if (palabra.equals("iA")) return null; - if (palabra.equals("ai")) return "EE"; - if (palabra.equals("au")) return "OO"; - if (palabra.equals("$")) return null; - if (palabra.startsWith("@") || palabra.startsWith("#")) - return null; // we can't convert this in isolation! We need context. - char []caract; - int i, j, len; - String nuevaPalabra; - - caract = palabra.toCharArray(); - len = palabra.length(); - for (j=0; j0) - { - i--; - break; - } - default:*/ - if (Character.isLowerCase(caract[i])) - caract[i] = Character.toUpperCase(caract[i]); - else if (Character.isUpperCase(caract[i])) - caract[i] = Character.toLowerCase(caract[i]); - /* break ciclo; - } - }*/ - } - nuevaPalabra = new String(caract); - // nuevaPalabra = palabra.toUpperCase(); - - // ahora hacer los cambios de Michael Roach - - nuevaPalabra = replace(nuevaPalabra, "TSH", "TQQ"); - nuevaPalabra = replace(nuevaPalabra, "TS", "TZ"); - nuevaPalabra = replace(nuevaPalabra, "TQQ", "TS"); - nuevaPalabra = replace(nuevaPalabra, "a", "'A"); - nuevaPalabra = replace(nuevaPalabra, "i", "'I"); - nuevaPalabra = replace(nuevaPalabra, "u", "'U"); - nuevaPalabra = replace(nuevaPalabra, "-I", "i"); - nuevaPalabra = replace(nuevaPalabra, "/", ","); - nuevaPalabra = replace(nuevaPalabra, "_", " "); - nuevaPalabra = replace(nuevaPalabra, "|", ";"); - nuevaPalabra = fixWazur(nuevaPalabra); - return nuevaPalabra; - } + } /** If more than half of the first letters among the first are 10 characters are uppercase assume its acip */ @@ -263,125 +203,30 @@ public class Manipulate else return (letters / upperCase < 2); } - public static String acipToWylie(String linea) + public static boolean isTibetanUnicodeCharacter(char ch) { - char caract[], ch, chP, chN; - String nuevaLinea; - int i, len; - boolean open; - - caract = linea.toCharArray(); - len = linea.length(); - for (i=0; i tsh, tz -> ts, v -> w, - TH -> Th, kSH, kaSH -> k+Sh, SH -> Sh, : -> H, dh -> d+h, gh -> g+h, bh -> b+h, dzh -> dz+h, - aa -> a, a'a -> A, ai->i, aee ->ai, au->u, aoo->au, ae->e, - ao->o, ee->ai, oo->au, 'I->-I I->-i, a'i->I, a'u->U, a'e->E, a'o->O, - a'i->I, a'u->U, a'e->E, a'o->O, ,->/, # -> @##, * -> @#, \ -> ?, ` -> !, - /-/ -> (-), ga-y -> g.y, g-y -> g.y, na-y -> n+y */ - - nuevaLinea = replace(nuevaLinea, "ts", "tq"); - nuevaLinea = replace(nuevaLinea, "tz", "ts"); - nuevaLinea = replace(nuevaLinea, "tq", "tsh"); - nuevaLinea = replace(nuevaLinea, "v", "w"); - nuevaLinea = replace(nuevaLinea, "TH", "Th"); - nuevaLinea = replace(nuevaLinea, "kSH", "k+Sh"); - nuevaLinea = replace(nuevaLinea, "kaSH", "k+Sh"); - nuevaLinea = replace(nuevaLinea, "SH", "Sh"); - nuevaLinea = replace(nuevaLinea, ":", "H"); - nuevaLinea = replace(nuevaLinea, "NH", "NaH"); - nuevaLinea = replace(nuevaLinea, "dh", "d+h"); - nuevaLinea = replace(nuevaLinea, "gh", "g+h"); - nuevaLinea = replace(nuevaLinea, "bh", "b+h"); - nuevaLinea = replace(nuevaLinea, "dzh", "dz+h"); - nuevaLinea = replace(nuevaLinea, "aa", "a"); - nuevaLinea = replace(nuevaLinea, "ai", "i"); - nuevaLinea = replace(nuevaLinea, "aee", "ai"); - nuevaLinea = replace(nuevaLinea, "au", "u"); - nuevaLinea = replace(nuevaLinea, "aoo", "au"); - nuevaLinea = replace(nuevaLinea, "ae", "e"); - nuevaLinea = replace(nuevaLinea, "ao", "o"); - nuevaLinea = replace(nuevaLinea, "ee", "ai"); - nuevaLinea = replace(nuevaLinea, "oo", "au"); - nuevaLinea = replace(nuevaLinea, "\'I", "\'q"); - nuevaLinea = replace(nuevaLinea, "I", "-i"); - nuevaLinea = replace(nuevaLinea, "\'q", "-I"); - nuevaLinea = replace(nuevaLinea, "\\", "?"); - nuevaLinea = replace(nuevaLinea, "`", "!"); - nuevaLinea = replace(nuevaLinea, "ga-y", "g.y"); - nuevaLinea = replace(nuevaLinea, "g-y", "g.y"); - nuevaLinea = replace(nuevaLinea, "na-y", "n+y"); - - len = nuevaLinea.length(); - for (i=0; i0 && i1 && !Character.isLetter(nuevaLinea.charAt(i-2)) || chN == 'a' && (i+2==len || !Character.isLetter(nuevaLinea.charAt(i+2))))) - { - nuevaLinea = nuevaLinea.substring(0,i-1) + Character.toUpperCase(chN) + nuevaLinea.substring(i+2); - len-=2; - } - } - } - } - } - - open = false; - for (i=0; i=0xF00 && ch<=0xFFF; } + public static boolean guessIfUnicode(String line) + { + char ch; + int letters=0, unicode=0, i, n; + n = line.length(); + if (n>10) n = 10; + for (i=0; i tsh, tz -> ts, v -> w, + TH -> Th, kSH, kaSH -> k+Sh, SH -> Sh, : -> H, dh -> d+h, gh -> g+h, bh -> b+h, dzh -> dz+h, + aa -> a, a'a -> A, ai->i, aee ->ai, au->u, aoo->au, ae->e, + ao->o, ee->ai, oo->au, 'I->-I I->-i, a'i->I, a'u->U, a'e->E, a'o->O, + a'i->I, a'u->U, a'e->E, a'o->O, ,->/, # -> @##, * -> @#, \ -> ?, ` -> !, + /-/ -> (-), ga-y -> g.y, g-y -> g.y, na-y -> n+y + + nuevaLinea = replace(nuevaLinea, "ts", "tq"); + nuevaLinea = replace(nuevaLinea, "tz", "ts"); + nuevaLinea = replace(nuevaLinea, "tq", "tsh"); + nuevaLinea = replace(nuevaLinea, "v", "w"); + nuevaLinea = replace(nuevaLinea, "TH", "Th"); + nuevaLinea = replace(nuevaLinea, "kSH", "k+Sh"); + nuevaLinea = replace(nuevaLinea, "kaSH", "k+Sh"); + nuevaLinea = replace(nuevaLinea, "SH", "Sh"); + nuevaLinea = replace(nuevaLinea, ":", "H"); + nuevaLinea = replace(nuevaLinea, "NH", "NaH"); + nuevaLinea = replace(nuevaLinea, "dh", "d+h"); + nuevaLinea = replace(nuevaLinea, "gh", "g+h"); + nuevaLinea = replace(nuevaLinea, "bh", "b+h"); + nuevaLinea = replace(nuevaLinea, "dzh", "dz+h"); + nuevaLinea = replace(nuevaLinea, "aa", "a"); + nuevaLinea = replace(nuevaLinea, "ai", "i"); + nuevaLinea = replace(nuevaLinea, "aee", "ai"); + nuevaLinea = replace(nuevaLinea, "au", "u"); + nuevaLinea = replace(nuevaLinea, "aoo", "au"); + nuevaLinea = replace(nuevaLinea, "ae", "e"); + nuevaLinea = replace(nuevaLinea, "ao", "o"); + nuevaLinea = replace(nuevaLinea, "ee", "ai"); + nuevaLinea = replace(nuevaLinea, "oo", "au"); + nuevaLinea = replace(nuevaLinea, "\'I", "\'q"); + nuevaLinea = replace(nuevaLinea, "I", "-i"); + nuevaLinea = replace(nuevaLinea, "\'q", "-I"); + nuevaLinea = replace(nuevaLinea, "\\", "?"); + nuevaLinea = replace(nuevaLinea, "`", "!"); + nuevaLinea = replace(nuevaLinea, "ga-y", "g.y"); + nuevaLinea = replace(nuevaLinea, "g-y", "g.y"); + nuevaLinea = replace(nuevaLinea, "na-y", "n+y"); + + len = nuevaLinea.length(); + for (i=0; i0 && i1 && !Character.isLetter(nuevaLinea.charAt(i-2)) || chN == 'a' && (i+2==len || !Character.isLetter(nuevaLinea.charAt(i+2))))) + { + nuevaLinea = nuevaLinea.substring(0,i-1) + Character.toUpperCase(chN) + nuevaLinea.substring(i+2); + len-=2; + } + } + } + } + } + + open = false; + for (i=0; i0) + { + i--; + break; + } + default: + if (Character.isLowerCase(caract[i])) + caract[i] = Character.toUpperCase(caract[i]); + else if (Character.isUpperCase(caract[i])) + caract[i] = Character.toLowerCase(caract[i]); + /* break ciclo; + } + } + } + nuevaPalabra = new String(caract); + // nuevaPalabra = palabra.toUpperCase(); + + // ahora hacer los cambios de Michael Roach + + nuevaPalabra = replace(nuevaPalabra, "TSH", "TQQ"); + nuevaPalabra = replace(nuevaPalabra, "TS", "TZ"); + nuevaPalabra = replace(nuevaPalabra, "TQQ", "TS"); + nuevaPalabra = replace(nuevaPalabra, "a", "'A"); + nuevaPalabra = replace(nuevaPalabra, "i", "'I"); + nuevaPalabra = replace(nuevaPalabra, "u", "'U"); + nuevaPalabra = replace(nuevaPalabra, "-I", "i"); + nuevaPalabra = replace(nuevaPalabra, "/", ","); + nuevaPalabra = replace(nuevaPalabra, "_", " "); + nuevaPalabra = replace(nuevaPalabra, "|", ";"); + nuevaPalabra = fixWazur(nuevaPalabra); + return nuevaPalabra; */ + } + + public static String unicodeToWylie(String unicode) + { + String machineWylie; + TibetanDocument tibDoc = new TibetanDocument(); + StringBuffer errors = new StringBuffer(); + + machineWylie = Converter.convertToEwtsForComputers(unicode, errors); + try + { + TibTextUtils.insertTibetanMachineWebForTranslit(true, machineWylie, tibDoc, 0, false); + } + catch (InvalidTransliterationException e) + { + return null; + } + return tibDoc.getWylie(new boolean[] { false }); + } + + /** From http://www.i18nfaq.com/2005/07/how-do-i-convert-ncr-format-to-java.html */ + public static String NCR2UnicodeString(String str) + { + StringBuffer ostr = new StringBuffer(); + int i1=0; + int i2=0; + + while(i2"); out.println(""); - out.println(""); + out.println(""); if (useTHDLBanner) { - out.println(" Tibetan and Himalayan Digital Library - The Online Tibetan to English Translation/Dictionary Tool"); - out.println(" "); - out.println(" "); - out.println(" "); - } - else - out.println(" The Online Tibetan to English Translation/Dictionary Tool"); - - out.println(" "); - out.println(" "); - out.println(" "); + out.println(" Tibetan and Himalayan Digital Library - The Online Tibetan to English Translation/Dictionary Tool"); + out.println(" "); + out.println(" "); + out.println(" "); + } + else + out.println(" The Online Tibetan to English Translation/Dictionary Tool"); + + out.println(" "); + out.println(" "); + out.println(" "); + + answer = request.getParameter(scriptStr); + + /* script==null || makes default tibetan + script!=null && makes default roman + */ + wantsTibetan = (answer==null || answer.equals(tibetanStr)); + /*if (wantsTibetan) + { + out.println(""); + }*/ + out.println(""); + out.println(""); - answer = request.getParameter(scriptStr); - - /* script==null || makes default tibetan - script!=null && makes default roman - */ - wantsTibetan = (answer==null || answer.equals(tibetanStr)); - if (wantsTibetan) - { - out.println(""); - } - out.println(""); - out.println(""); - if (useTHDLBanner) { - out.println(""); - out.println("
"); - out.println("
"); - out.println("
"); - out.println("

"); - out.println(" "); - out.println(" "); - out.println(" "); - out.println("

"); - out.println("
"); - out.println("
"); - out.println("
"); - out.println(" Home > Reference > Translation Tool"); - out.println("
"); - out.println("
"); - out.println("
"); - } - - out.println("

The Online Tibetan to English Translation/Dictionary Tool

"); - - try - { - out.println(rb.getString(otherLinksProperty)); - } - catch (MissingResourceException e) - { - // do nothing - } - + out.println(""); + out.println("
"); + out.println("
"); + out.println("
"); + out.println("

"); + out.println(" "); + out.println(" "); + out.println(" "); + out.println("

"); + out.println("
"); + out.println("
"); + out.println("
"); + out.println(" Home > Reference > Translation Tool"); + out.println("
"); + out.println("
"); + out.println("
"); + } + + out.println("

The Online Tibetan to English Translation/Dictionary Tool

"); + + try + { + out.println(rb.getString(otherLinksProperty)); + } + catch (MissingResourceException e) + { + // do nothing + } + if (useTHDLBanner) { - out.println("
"); - } - else - { - out.println(""); - } - out.println(""); - out.println(" "); - out.println(" "); - out.println(" "); - out.println(" "); - out.println("
"); - out.println("

Display results in:

"); - out.println("

Tibetan script (using Tibetan Machine Web font)
"); - out.println(" Roman script

"); - + out.println(""); + } + else + { + out.println(""); + } + out.println(""); + out.println(" "); + out.println(" "); + out.println(" "); + out.println(" "); + out.println("
"); + out.println("

Display results in:

"); + out.println("

Tibetan script (using Tibetan Machine Uni font)
"); + out.println(" Roman script

"); + if (dictionaries!=null) { int i; ds = scanner.getDictionarySource(); ds.reset(); checkedDicts = new boolean[dictionaries.length]; -/* out.println(" "); - out.println("Search in dictionaries:");*/ + /* out.println(" "); + out.println("Search in dictionaries:");*/ out.println("

Search in dictionaries: "); allUnchecked=true; for (i=0; i"); out.println("

"); - out.println(""); - out.println(" "); - out.println(" "); + out.println(" "); + out.println(" "); + out.println("
"); + out.println(""); + out.println(" "); + out.println(" "); - out.println(" "); - out.println(" "); - out.println("
"); out.println("

Input text:

"); - out.println("
"); - out.println("

"); - out.println("
"); + out.println("
"); + out.println("

"); + out.println("
"); - out.println(""); - out.println("
"); + if (answer == null || answer != null && !answer.equals(clearStr)) + { + parrafo = request.getParameter("parrafo"); + if (parrafo!=null) out.print(parrafo); + } - if (parrafo != null) - { - sl.writeLog("Translation\tOnLineScannerFilter"); - if (ds!=null && !ds.isEmpty()) - desglosar(parrafo, out, wantsTibetan); - } - else sl.writeLog("Invocation\tOnLineScannerFilter"); + out.println(""); + out.println(""); + + if (parrafo != null) + { + sl.writeLog("Translation\tOnLineScannerFilter"); + if (ds!=null && !ds.isEmpty()) + desglosar(parrafo, out, wantsTibetan); + } + else sl.writeLog("Invocation\tOnLineScannerFilter"); out.println(TibetanScanner.copyrightHTML); if (useTHDLBanner) out.println("
"); - out.println(""); - out.println(""); - } + out.println(""); + out.println(""); + } - public void doPost(HttpServletRequest request, - HttpServletResponse response) - //throws IOException, ServletException - { - doGet(request, response); - } + public void doPost(HttpServletRequest request, + HttpServletResponse response) + //throws IOException, ServletException + { + doGet(request, response); + } synchronized public void desglosar(String in, PrintWriter pw, boolean tibetan) { @@ -298,22 +300,25 @@ public class OnLineScannerFilter extends HttpServlet if (!in.equals("")) { - /* while (hayMasLineas) - { - fin = in.indexOf("\n",init); - if (fin<0) - { - linea = in.substring(init).trim(); - hayMasLineas=false; - } - else - linea = in.substring(init, fin).trim(); - - scanner.scanBody(linea); - - init = fin+1; - } */ + /* while (hayMasLineas) + { + fin = in.indexOf("\n",init); + if (fin<0) + { + linea = in.substring(init).trim(); + hayMasLineas=false; + } + else + linea = in.substring(init, fin).trim(); + + scanner.scanBody(linea); + + init = fin+1; + } */ scanner.clearTokens(); + in = Manipulate.NCR2UnicodeString(in); + if (Manipulate.guessIfUnicode(in)) in = Manipulate.unicodeToWylie(in); + else if (Manipulate.guessIfAcip(in)) in = Manipulate.acipToWylie(in); scanner.scanBody(in); scanner.finishUp(); printText(pw, tibetan); @@ -335,35 +340,35 @@ public class OnLineScannerFilter extends HttpServlet for (i=0; i < words.length; i++) { - if (words[i] instanceof Word) - { - word = new SwingWord((Word)words[i]); - // if (word.getDefs().getDictionarySource()!=null) - pw.print(word.getLink()); - // else pw.print(word.getWylie() + " "); - } - else - { - if (words[i] instanceof PunctuationMark) - { - pm = words[i].toString().charAt(0); - switch (pm) - { - case '\n': - pw.println("

"); - pw.print("

"); - break; - case '<': - pw.print("< "); - break; - case '>': - pw.print("> "); - break; - default: - pw.print(pm + " "); - } - } - } + if (words[i] instanceof Word) + { + word = new SwingWord((Word)words[i]); + // if (word.getDefs().getDictionarySource()!=null) + pw.print(word.getLink(tibetan)); + // else pw.print(word.getWylie() + " "); + } + else + { + if (words[i] instanceof PunctuationMark) + { + pm = words[i].toString().charAt(0); + switch (pm) + { + case '\n': + pw.println("

"); + pw.print("

"); + break; + case '<': + pw.print("< "); + break; + case '>': + pw.print("> "); + break; + default: + pw.print(pm + " "); + } + } + } } pw.println("

"); } @@ -376,17 +381,17 @@ public class OnLineScannerFilter extends HttpServlet String tag; DictionarySource ds; ByteDictionarySource sourceb=null; - + words = scanner.getWordArray(false); - + if (words == null) return; - + pw.println(""); - + for (j = 0; j < words.length; j++) { try { - + word = new SwingWord(words[j]); defs = word.getDefs(); ds = defs.getDictionarySource(); @@ -400,19 +405,20 @@ public class OnLineScannerFilter extends HttpServlet } else { sourceb = (ByteDictionarySource) ds; - k=0; - while (sourceb.isEmpty(k)) k++; - tag = sourceb.getTag(k); - k++; + k=0; + while (sourceb.isEmpty(k)) k++; + tag = sourceb.getTag(k); + k++; } } - pw.println(" "); + pw.print(" "); pw.println(" "); pw.println(" "); - + pw.println(" "); for (i = 1; i < defs.def.length; i++) { pw.println(" "); @@ -421,9 +427,9 @@ public class OnLineScannerFilter extends HttpServlet tag = ds.getTag(i); } else { - while (sourceb.isEmpty(k)) k++; - tag = sourceb.getTag(k); - k++; + while (sourceb.isEmpty(k)) k++; + tag = sourceb.getTag(k); + k++; } pw.println(" "); @@ -435,17 +441,17 @@ public class OnLineScannerFilter extends HttpServlet sl.writeLog("Crash\tOnLineScannerFilter\t" + word.getWylie()); sl.writeException(e); } - + } pw.println("
" + word.getBookmark(tibetan) - + "" + word.getBookmark(tibetan) + "" + tag + "" + defs.def[0] + "
" + tag + "
"); } public void destroy() { - super.destroy(); - sl.setUserIP(null); - sl.writeLog("Shutdown\tOnLineScannerFilter"); - scanner.destroy(); + super.destroy(); + sl.setUserIP(null); + sl.writeLog("Shutdown\tOnLineScannerFilter"); + scanner.destroy(); } } diff --git a/source/org/thdl/tib/scanner/StrictDuffPane.java b/source/org/thdl/tib/scanner/StrictDuffPane.java index 3ded0eb..747e469 100644 --- a/source/org/thdl/tib/scanner/StrictDuffPane.java +++ b/source/org/thdl/tib/scanner/StrictDuffPane.java @@ -1,20 +1,20 @@ /* -The contents of this file are subject to the AMP Open Community License -Version 1.0 (the "License"); you may not use this file except in compliance -with the License. You may obtain a copy of the License on the AMP web site -(http://www.tibet.iteso.mx/Guatemala/). - -Software distributed under the License is distributed on an "AS IS" basis, -WITHOUT WARRANTY OF ANY KIND, either express or implied. See the -License for the specific terms governing rights and limitations under the -License. - -The Initial Developer of this software is Andres Montano Pellegrini. Portions -created by Andres Montano Pellegrini are Copyright 2001 Andres Montano -Pellegrini. All Rights Reserved. - -Contributor(s): ______________________________________. -*/ + The contents of this file are subject to the AMP Open Community License + Version 1.0 (the "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License on the AMP web site + (http://www.tibet.iteso.mx/Guatemala/). + + Software distributed under the License is distributed on an "AS IS" basis, + WITHOUT WARRANTY OF ANY KIND, either express or implied. See the + License for the specific terms governing rights and limitations under the + License. + + The Initial Developer of this software is Andres Montano Pellegrini. Portions + created by Andres Montano Pellegrini are Copyright 2001 Andres Montano + Pellegrini. All Rights Reserved. + + Contributor(s): ______________________________________. + */ package org.thdl.tib.scanner; @@ -29,127 +29,136 @@ import javax.swing.text.BadLocationException; import org.thdl.tib.input.DuffPane; import org.thdl.tib.text.TibetanDocument; +import org.thdl.tib.text.reverter.*; import org.thdl.util.RTFFixerInputStream; import org.thdl.util.ThdlDebug; import org.thdl.util.ThdlOptions; /** Identical to DuffPane except that it only supports Tibetan script in - TibetanMachineWeb. No roman script can be inputted. If roman script is - pasted, it is assumed that it is either ACIP or wylie and is converted - accordingly to TibetanMachineWeb. If text in TibetanMachine is pasted, - it is converted to TibetanMachineWeb. Any other font is assumed to be - Roman script. -*/ + TibetanMachineWeb. No roman script can be inputted. If roman script is + pasted, it is assumed that it is either ACIP or wylie and is converted + accordingly to TibetanMachineWeb. If text in TibetanMachine is pasted, + it is converted to TibetanMachineWeb. Any other font is assumed to be + Roman script. + */ public class StrictDuffPane extends DuffPane { - public StrictDuffPane() - { - super(); - disableRoman(); - } - - /** Smart paste! Automatically recognizes what is being pasted and converts - respectively. Currently it supports pasting from TibetanMachineWeb, - TibetanMachine, wylie, and ACIP. - */ - public void paste(int offset) - { - // Respect setEditable(boolean): - if (!this.isEditable()) - return; - - try - { - Transferable contents = rtfBoard.getContents(this); - - if (contents.isDataFlavorSupported(rtfFlavor)){ - - InputStream in = (InputStream)contents.getTransferData(rtfFlavor); - int p1 = offset; - - //construct new document that contains only portion of text you want to paste - TibetanDocument sd = new TibetanDocument(); - - // I swear this happened once when I pasted in some - // random junk just after Jskad started up. - ThdlDebug.verify(null != in); - - boolean errorReading = false; - - try - { - if (!ThdlOptions.getBooleanOption("thdl.do.not.fix.rtf.hex.escapes")) - in = new RTFFixerInputStream(in); - rtfEd.read(in, sd, 0); - } catch (Exception e) { - - errorReading = true; - - /* If fonts weren't supported and we don't know what it is try to paste - ACIP or wylie. - */ - if (contents.isDataFlavorSupported(DataFlavor.stringFlavor)) - { - String data = (String)contents.getTransferData(DataFlavor.stringFlavor); - if (Manipulate.guessIfAcip(data)) data = Manipulate.acipToWylie(data); - toTibetanMachineWeb(data, offset); - } - // JOptionPane.showMessageDialog(this, "You cannot paste from the application from which you copied.\nIt uses an RTF format that is too advanced for the version\nof Java Jskad is running atop."); - } - - if (!errorReading) - { - /* If it is any font beside TibetanMachine and TibetanMachineWeb - assume it is wylie or Acip. - */ - if (!sd.getFont((sd.getCharacterElement(0).getAttributes())).getFamily().startsWith("TibetanMachine") - && contents.isDataFlavorSupported(DataFlavor.stringFlavor)) - { - String data = (String)contents.getTransferData(DataFlavor.stringFlavor); - if (Manipulate.guessIfAcip(data)) data = Manipulate.acipToWylie(data); - toTibetanMachineWeb(data, offset); - } - else - { - // If it's font is TibetanMachine, convert to TibetanMachineWeb first - if (sd.getFont((sd.getCharacterElement(0).getAttributes())).getFamily().equals("TibetanMachine")) - { - StringBuffer errors = new StringBuffer(); - long numAttemptedReplacements[] = new long[] { 0 }; - sd.convertToTMW(0, -1, errors, numAttemptedReplacements); - } - - for (int i=0; i" + result + " "; + return "" + result + " "; } } diff --git a/source/org/thdl/tib/scanner/TibetanScanner.java b/source/org/thdl/tib/scanner/TibetanScanner.java index d473ae8..d4d439b 100644 --- a/source/org/thdl/tib/scanner/TibetanScanner.java +++ b/source/org/thdl/tib/scanner/TibetanScanner.java @@ -27,7 +27,7 @@ import org.thdl.util.ThdlVersion; */ public abstract class TibetanScanner { - public static final String version = "The Tibetan to English Translation Tool, version 3.2.1 compiled on " + ThdlVersion.getTimeOfCompilation() + ". "; + public static final String version = "The Tibetan to English Translation Tool, version 3.3.0 compiled on " + ThdlVersion.getTimeOfCompilation() + ". "; public static final String copyrightUnicode="Copyright " + '\u00A9' + " 2000-2005 by Andr" + '\u00E9' + "s Montano Pellegrini, all rights reserved."; public static final String copyrightASCII="Copyright 2000-2005 by Andres Montano Pellegrini, all rights reserved."; public static final String copyrightHTML="
" + version + "Copyright © 2000-2005 by Andrés Montano Pellegrini.
All rights reserved.
";