From 67bddb7a7e6ad28f8ddbbdd0f9c107786970471d Mon Sep 17 00:00:00 2001 From: amontano Date: Mon, 24 Apr 2006 06:09:17 +0000 Subject: [PATCH] Updated the translation tool to accept Tibetan Unicode. For the application version, this means that the smart paste is unicode aware and will do the appropriate conversion. In the servlet version tibetan unicode can now be inputted in the form and tibetan machine uni is used to display the results. The Manipulate class now includes: acipToWylie, wylieToAcip, and unicodeToWylie. They provide a simple interfase to David Chandler's converters that are used by the translation tool. --- .../thdl/tib/scanner/LocalTibetanScanner.java | 308 +++++------ source/org/thdl/tib/scanner/Manipulate.java | 479 ++++++++++------- .../thdl/tib/scanner/OnLineScannerFilter.java | 494 +++++++++--------- .../org/thdl/tib/scanner/StrictDuffPane.java | 273 +++++----- source/org/thdl/tib/scanner/SwingWord.java | 12 +- .../org/thdl/tib/scanner/TibetanScanner.java | 2 +- 6 files changed, 851 insertions(+), 717 deletions(-) diff --git a/source/org/thdl/tib/scanner/LocalTibetanScanner.java b/source/org/thdl/tib/scanner/LocalTibetanScanner.java index c7895e2..1fa3df6 100644 --- a/source/org/thdl/tib/scanner/LocalTibetanScanner.java +++ b/source/org/thdl/tib/scanner/LocalTibetanScanner.java @@ -1,44 +1,44 @@ /* -The contents of this file are subject to the AMP Open Community License -Version 1.0 (the "License"); you may not use this file except in compliance -with the License. You may obtain a copy of the License on the AMP web site -(http://www.tibet.iteso.mx/Guatemala/). - -Software distributed under the License is distributed on an "AS IS" basis, -WITHOUT WARRANTY OF ANY KIND, either express or implied. See the -License for the specific terms governing rights and limitations under the -License. - -The Initial Developer of this software is Andres Montano Pellegrini. Portions -created by Andres Montano Pellegrini are Copyright 2001 Andres Montano -Pellegrini. All Rights Reserved. - -Contributor(s): ______________________________________. -*/ + The contents of this file are subject to the AMP Open Community License + Version 1.0 (the "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License on the AMP web site + (http://www.tibet.iteso.mx/Guatemala/). + + Software distributed under the License is distributed on an "AS IS" basis, + WITHOUT WARRANTY OF ANY KIND, either express or implied. See the + License for the specific terms governing rights and limitations under the + License. + + The Initial Developer of this software is Andres Montano Pellegrini. Portions + created by Andres Montano Pellegrini are Copyright 2001 Andres Montano + Pellegrini. All Rights Reserved. + + Contributor(s): ______________________________________. + */ package org.thdl.tib.scanner; import java.util.Enumeration; import java.util.Vector; /** Loads dictionary stored in tree format and searches for words recursively. - How the the dictionary is loaded depends on which implementation of - {@link SyllableListTree} is invoked. - - @author Andrés Montano Pellegrini - @see SyllableListTree -*/ + How the the dictionary is loaded depends on which implementation of + {@link SyllableListTree} is invoked. + + @author Andrés Montano Pellegrini + @see SyllableListTree + */ public class LocalTibetanScanner extends TibetanScanner { public static String archivo; private SyllableListTree raiz, silActual, lastCompSil, silAnterior; private String wordActual, lastCompWord; private Vector floatingSil; - + static { archivo = null; } - + public BitDictionarySource getDictionarySource() { return raiz.getDictionarySourcesWanted(); @@ -46,12 +46,12 @@ public class LocalTibetanScanner extends TibetanScanner public LocalTibetanScanner(String arch) throws Exception { - this (arch, true); + this (arch, true); } - + public LocalTibetanScanner(String arch, boolean backwardCompatible) throws Exception { - super(); + super(); archivo = arch; // raiz = new MemorySyllableListTree(archivo); // raiz = new FileSyllableListTree(archivo); @@ -59,13 +59,13 @@ public class LocalTibetanScanner extends TibetanScanner floatingSil = new Vector(); resetAll(); } - + private void resetAll() { silAnterior = silActual = lastCompSil = null; wordActual = lastCompWord = null; } - + private void scanSyllable(String sil) { SyllableListTree resultado=null; @@ -73,13 +73,13 @@ public class LocalTibetanScanner extends TibetanScanner Word w; String silSinDec; boolean aadded; - + if (silActual==null) silActual = raiz; - + silAnterior = silActual; silActual = silActual.lookUp(sil); - + if (silActual != null) { if (silActual.hasDef()) @@ -113,9 +113,9 @@ public class LocalTibetanScanner extends TibetanScanner } else { - resultado = null; - if (aadded) silSinDec = silSinDec.substring(0, silSinDec.length()-1); - silSinDec = withOutDec(silSinDec); + resultado = null; + if (aadded) silSinDec = silSinDec.substring(0, silSinDec.length()-1); + silSinDec = withOutDec(silSinDec); } } if (resultado!=null) return; @@ -134,9 +134,9 @@ public class LocalTibetanScanner extends TibetanScanner { resultado = silAnterior.lookUp(silSinDec); /* here we don't have to worry about being in the middle of a - word since the declension marks that it is the end of a - word. - */ + word since the declension marks that it is the end of a + word. + */ if (resultado == null || !resultado.hasDef()) { silSinDec += "\'"; @@ -153,26 +153,26 @@ public class LocalTibetanScanner extends TibetanScanner } else { - resultado = null; - if (aadded) silSinDec = silSinDec.substring(0, silSinDec.length()-1); - silSinDec = withOutDec(silSinDec); + resultado = null; + if (aadded) silSinDec = silSinDec.substring(0, silSinDec.length()-1); + silSinDec = withOutDec(silSinDec); } - + } if (resultado!=null) return; if (lastCompSil!=null) { - if (lastCompWord.equals(wordActual)) w = new Word(lastCompWord, lastCompSil.getDefs()); + if (lastCompWord.equals(wordActual)) w = new Word(lastCompWord, lastCompSil.getDefs()); else w = new Word(lastCompWord, wordActual, lastCompSil.getDefs()); wordList.addLast(w); this.resetAll(); - + enumeration = floatingSil.elements(); floatingSil = new Vector(); while (enumeration.hasMoreElements()) scanSyllable((String)enumeration.nextElement()); - + scanSyllable(sil); } else @@ -193,32 +193,32 @@ public class LocalTibetanScanner extends TibetanScanner } } } - + public void finishUp() { Enumeration enumeration; Word w; - + while (lastCompSil!=null) { if (lastCompWord.equals(wordActual)) w = new Word(lastCompWord, lastCompSil.getDefs()); else w = new Word(lastCompWord, wordActual, lastCompSil.getDefs()); wordList.addLast(w); this.resetAll(); - + enumeration = floatingSil.elements(); floatingSil = new Vector(); while (enumeration.hasMoreElements()) scanSyllable((String)enumeration.nextElement()); } - + if (silActual!=null) { wordList.addLast(new Word(wordActual, "[incomplete word]")); this.resetAll(); } } - + private static String concatWithSpace(String s1, String s2) { if (s1==null || s1.equals("")) @@ -226,14 +226,14 @@ public class LocalTibetanScanner extends TibetanScanner else return s1 + ' ' + s2; } - + private static String withOutDec(String sil) { boolean isDeclined =false; int len = sil.length(), apos; - + if (len<3) return null; - + char lastCar = Character.toLowerCase(sil.charAt(len-1)); if ((lastCar == 's' || lastCar == 'r') && Manipulate.isVowel(sil.charAt(len-2))) { @@ -242,19 +242,19 @@ public class LocalTibetanScanner extends TibetanScanner } else { - apos = sil.lastIndexOf('\''); - if (apos>0 && apos < len-1 && Manipulate.isVowel(sil.charAt(apos-1)) && sil.charAt(apos+1)!='u') - { - isDeclined=true; - sil = sil.substring(0, apos); - } - /* if ((lastCar == 'i' || lastCar == 'o') && sil.charAt(len-2)=='\'') - { - isDeclined=true; - sil = sil.substring(0, len-2); - }*/ + apos = sil.lastIndexOf('\''); + if (apos>0 && apos < len-1 && Manipulate.isVowel(sil.charAt(apos-1)) && sil.charAt(apos+1)!='u') + { + isDeclined=true; + sil = sil.substring(0, apos); + } + /* if ((lastCar == 'i' || lastCar == 'o') && sil.charAt(len-2)=='\'') + { + isDeclined=true; + sil = sil.substring(0, len-2); + }*/ } - + if (!isDeclined) return null; return sil; } @@ -262,127 +262,127 @@ public class LocalTibetanScanner extends TibetanScanner public void scanBody(String in) { boolean hayMasLineas=true; - + if (in.equals("")) finishUp(); else { int init = 0, fin; String linea; - + while (hayMasLineas) { fin = in.indexOf("\n",init); if (fin<0) { - linea = in.substring(init).trim(); - hayMasLineas=false; + linea = in.substring(init).trim(); + hayMasLineas=false; } else - linea = in.substring(init, fin).trim(); - + linea = in.substring(init, fin).trim(); + if (linea.equals("")) { - finishUp(); - wordList.addLast(new PunctuationMark('\n')); + finishUp(); + wordList.addLast(new PunctuationMark('\n')); } else - scanLine(linea); - + scanLine(linea); + init = fin+1; } } } - + public void scanLine(String linea) { int init = 0, fin; char ch; String sil; boolean doNotFinishUp; - + if (linea.equals("")) { - finishUp(); - wordList.addLast(new PunctuationMark('\n')); - return; + finishUp(); + wordList.addLast(new PunctuationMark('\n')); + return; } - -outAHere: - while(true) - { - doNotFinishUp=true; - - // Make init skip all punctuation marks - while (true) + + outAHere: + while(true) { - if (init>=linea.length()) - break outAHere; - ch = linea.charAt(init); - if (Manipulate.isPunctuationMark(ch)) - { - if (doNotFinishUp) - { - finishUp(); - doNotFinishUp=false; - } - wordList.addLast(new PunctuationMark(ch)); - } - else if (!Manipulate.isEndOfSyllableMark(ch)) - break; - - init++; - } - - doNotFinishUp = true; - - /* move fin to the end of the next syllable. If finishing - up is necessary it is done after scanSyllable - */ - - fin = init+1; - while (fin < linea.length()) - { - ch = linea.charAt(fin); - if (Manipulate.isPunctuationMark(ch)) - { - doNotFinishUp = false; - break; - } - else if (Manipulate.isEndOfSyllableMark(ch)) - { - break; - } - else - { - fin++; - if (fin>=linea.length()) - break; + doNotFinishUp=true; + + // Make init skip all punctuation marks + while (true) + { + if (init>=linea.length()) + break outAHere; + ch = linea.charAt(init); + if (Manipulate.isPunctuationMark(ch)) + { + if (doNotFinishUp) + { + finishUp(); + doNotFinishUp=false; + } + wordList.addLast(new PunctuationMark(ch)); + } + else if (!Manipulate.isEndOfSyllableMark(ch)) + break; + + init++; } + + doNotFinishUp = true; + + /* move fin to the end of the next syllable. If finishing + up is necessary it is done after scanSyllable + */ + + fin = init+1; + while (fin < linea.length()) + { + ch = linea.charAt(fin); + if (Manipulate.isPunctuationMark(ch)) + { + doNotFinishUp = false; + break; + } + else if (Manipulate.isEndOfSyllableMark(ch)) + { + break; + } + else + { + fin++; + if (fin>=linea.length()) + break; + } + } + + sil = linea.substring(init, fin); + scanSyllable(sil); + + if (!doNotFinishUp) + { + finishUp(); + wordList.addLast(new PunctuationMark(ch)); + } + init = fin+1; } - - sil = linea.substring(init, fin); - scanSyllable(sil); - - if (!doNotFinishUp) - { - finishUp(); - wordList.addLast(new PunctuationMark(ch)); - } - init = fin+1; - } - } - - /** Looks for .dic file, and returns the dictionary descriptions. - Also updates the definitionTags in the Definitions class. - */ - public String[] getDictionaryDescriptions() - { - return FileSyllableListTree.getDictionaryDescriptions(archivo); } - public void destroy() - { - FileSyllableListTree.closeFiles(); - } - + /** Looks for .dic file, and returns the dictionary descriptions. + Also updates the definitionTags in the Definitions class. + */ + public String[] getDictionaryDescriptions() + { + return FileSyllableListTree.getDictionaryDescriptions(archivo); + } + + public void destroy() + { + FileSyllableListTree.closeFiles(); + } + } \ No newline at end of file diff --git a/source/org/thdl/tib/scanner/Manipulate.java b/source/org/thdl/tib/scanner/Manipulate.java index a00fe21..f5f1574 100644 --- a/source/org/thdl/tib/scanner/Manipulate.java +++ b/source/org/thdl/tib/scanner/Manipulate.java @@ -17,6 +17,10 @@ Contributor(s): ______________________________________. */ package org.thdl.tib.scanner; +import org.thdl.tib.text.*; +import org.thdl.tib.text.reverter.*; + + /** Miscelaneous static methods for the manipulation of Tibetan text. @author Andrés Montano Pellegrini @@ -24,7 +28,6 @@ package org.thdl.tib.scanner; public class Manipulate { - private static String endOfParagraphMarks = "/;|!:^@#$%="; private static String bracketMarks = "<>(){}[]"; private static String endOfSyllableMarks = " _\t"; @@ -177,70 +180,7 @@ public class Manipulate { ch = Character.toLowerCase(ch); return ch=='a' || ch=='e' || ch=='i' || ch=='o' || ch=='u'; - } - - public static String wylieToAcip(String palabra) - { - // DLC FIXME: for unknown things, return null. - if (palabra.equals("@##")) return "#"; - if (palabra.equals("@#")) return "*"; - if (palabra.equals("!")) return "`"; - if (palabra.equals("b+h")) return "BH"; - if (palabra.equals("d+h")) return "DH"; - if (palabra.equals("X")) return null; - if (palabra.equals("iA")) return null; - if (palabra.equals("ai")) return "EE"; - if (palabra.equals("au")) return "OO"; - if (palabra.equals("$")) return null; - if (palabra.startsWith("@") || palabra.startsWith("#")) - return null; // we can't convert this in isolation! We need context. - char []caract; - int i, j, len; - String nuevaPalabra; - - caract = palabra.toCharArray(); - len = palabra.length(); - for (j=0; j0) - { - i--; - break; - } - default:*/ - if (Character.isLowerCase(caract[i])) - caract[i] = Character.toUpperCase(caract[i]); - else if (Character.isUpperCase(caract[i])) - caract[i] = Character.toLowerCase(caract[i]); - /* break ciclo; - } - }*/ - } - nuevaPalabra = new String(caract); - // nuevaPalabra = palabra.toUpperCase(); - - // ahora hacer los cambios de Michael Roach - - nuevaPalabra = replace(nuevaPalabra, "TSH", "TQQ"); - nuevaPalabra = replace(nuevaPalabra, "TS", "TZ"); - nuevaPalabra = replace(nuevaPalabra, "TQQ", "TS"); - nuevaPalabra = replace(nuevaPalabra, "a", "'A"); - nuevaPalabra = replace(nuevaPalabra, "i", "'I"); - nuevaPalabra = replace(nuevaPalabra, "u", "'U"); - nuevaPalabra = replace(nuevaPalabra, "-I", "i"); - nuevaPalabra = replace(nuevaPalabra, "/", ","); - nuevaPalabra = replace(nuevaPalabra, "_", " "); - nuevaPalabra = replace(nuevaPalabra, "|", ";"); - nuevaPalabra = fixWazur(nuevaPalabra); - return nuevaPalabra; - } + } /** If more than half of the first letters among the first are 10 characters are uppercase assume its acip */ @@ -263,125 +203,30 @@ public class Manipulate else return (letters / upperCase < 2); } - public static String acipToWylie(String linea) + public static boolean isTibetanUnicodeCharacter(char ch) { - char caract[], ch, chP, chN; - String nuevaLinea; - int i, len; - boolean open; - - caract = linea.toCharArray(); - len = linea.length(); - for (i=0; i tsh, tz -> ts, v -> w, - TH -> Th, kSH, kaSH -> k+Sh, SH -> Sh, : -> H, dh -> d+h, gh -> g+h, bh -> b+h, dzh -> dz+h, - aa -> a, a'a -> A, ai->i, aee ->ai, au->u, aoo->au, ae->e, - ao->o, ee->ai, oo->au, 'I->-I I->-i, a'i->I, a'u->U, a'e->E, a'o->O, - a'i->I, a'u->U, a'e->E, a'o->O, ,->/, # -> @##, * -> @#, \ -> ?, ` -> !, - /-/ -> (-), ga-y -> g.y, g-y -> g.y, na-y -> n+y */ - - nuevaLinea = replace(nuevaLinea, "ts", "tq"); - nuevaLinea = replace(nuevaLinea, "tz", "ts"); - nuevaLinea = replace(nuevaLinea, "tq", "tsh"); - nuevaLinea = replace(nuevaLinea, "v", "w"); - nuevaLinea = replace(nuevaLinea, "TH", "Th"); - nuevaLinea = replace(nuevaLinea, "kSH", "k+Sh"); - nuevaLinea = replace(nuevaLinea, "kaSH", "k+Sh"); - nuevaLinea = replace(nuevaLinea, "SH", "Sh"); - nuevaLinea = replace(nuevaLinea, ":", "H"); - nuevaLinea = replace(nuevaLinea, "NH", "NaH"); - nuevaLinea = replace(nuevaLinea, "dh", "d+h"); - nuevaLinea = replace(nuevaLinea, "gh", "g+h"); - nuevaLinea = replace(nuevaLinea, "bh", "b+h"); - nuevaLinea = replace(nuevaLinea, "dzh", "dz+h"); - nuevaLinea = replace(nuevaLinea, "aa", "a"); - nuevaLinea = replace(nuevaLinea, "ai", "i"); - nuevaLinea = replace(nuevaLinea, "aee", "ai"); - nuevaLinea = replace(nuevaLinea, "au", "u"); - nuevaLinea = replace(nuevaLinea, "aoo", "au"); - nuevaLinea = replace(nuevaLinea, "ae", "e"); - nuevaLinea = replace(nuevaLinea, "ao", "o"); - nuevaLinea = replace(nuevaLinea, "ee", "ai"); - nuevaLinea = replace(nuevaLinea, "oo", "au"); - nuevaLinea = replace(nuevaLinea, "\'I", "\'q"); - nuevaLinea = replace(nuevaLinea, "I", "-i"); - nuevaLinea = replace(nuevaLinea, "\'q", "-I"); - nuevaLinea = replace(nuevaLinea, "\\", "?"); - nuevaLinea = replace(nuevaLinea, "`", "!"); - nuevaLinea = replace(nuevaLinea, "ga-y", "g.y"); - nuevaLinea = replace(nuevaLinea, "g-y", "g.y"); - nuevaLinea = replace(nuevaLinea, "na-y", "n+y"); - - len = nuevaLinea.length(); - for (i=0; i0 && i1 && !Character.isLetter(nuevaLinea.charAt(i-2)) || chN == 'a' && (i+2==len || !Character.isLetter(nuevaLinea.charAt(i+2))))) - { - nuevaLinea = nuevaLinea.substring(0,i-1) + Character.toUpperCase(chN) + nuevaLinea.substring(i+2); - len-=2; - } - } - } - } - } - - open = false; - for (i=0; i=0xF00 && ch<=0xFFF; } + public static boolean guessIfUnicode(String line) + { + char ch; + int letters=0, unicode=0, i, n; + n = line.length(); + if (n>10) n = 10; + for (i=0; i tsh, tz -> ts, v -> w, + TH -> Th, kSH, kaSH -> k+Sh, SH -> Sh, : -> H, dh -> d+h, gh -> g+h, bh -> b+h, dzh -> dz+h, + aa -> a, a'a -> A, ai->i, aee ->ai, au->u, aoo->au, ae->e, + ao->o, ee->ai, oo->au, 'I->-I I->-i, a'i->I, a'u->U, a'e->E, a'o->O, + a'i->I, a'u->U, a'e->E, a'o->O, ,->/, # -> @##, * -> @#, \ -> ?, ` -> !, + /-/ -> (-), ga-y -> g.y, g-y -> g.y, na-y -> n+y + + nuevaLinea = replace(nuevaLinea, "ts", "tq"); + nuevaLinea = replace(nuevaLinea, "tz", "ts"); + nuevaLinea = replace(nuevaLinea, "tq", "tsh"); + nuevaLinea = replace(nuevaLinea, "v", "w"); + nuevaLinea = replace(nuevaLinea, "TH", "Th"); + nuevaLinea = replace(nuevaLinea, "kSH", "k+Sh"); + nuevaLinea = replace(nuevaLinea, "kaSH", "k+Sh"); + nuevaLinea = replace(nuevaLinea, "SH", "Sh"); + nuevaLinea = replace(nuevaLinea, ":", "H"); + nuevaLinea = replace(nuevaLinea, "NH", "NaH"); + nuevaLinea = replace(nuevaLinea, "dh", "d+h"); + nuevaLinea = replace(nuevaLinea, "gh", "g+h"); + nuevaLinea = replace(nuevaLinea, "bh", "b+h"); + nuevaLinea = replace(nuevaLinea, "dzh", "dz+h"); + nuevaLinea = replace(nuevaLinea, "aa", "a"); + nuevaLinea = replace(nuevaLinea, "ai", "i"); + nuevaLinea = replace(nuevaLinea, "aee", "ai"); + nuevaLinea = replace(nuevaLinea, "au", "u"); + nuevaLinea = replace(nuevaLinea, "aoo", "au"); + nuevaLinea = replace(nuevaLinea, "ae", "e"); + nuevaLinea = replace(nuevaLinea, "ao", "o"); + nuevaLinea = replace(nuevaLinea, "ee", "ai"); + nuevaLinea = replace(nuevaLinea, "oo", "au"); + nuevaLinea = replace(nuevaLinea, "\'I", "\'q"); + nuevaLinea = replace(nuevaLinea, "I", "-i"); + nuevaLinea = replace(nuevaLinea, "\'q", "-I"); + nuevaLinea = replace(nuevaLinea, "\\", "?"); + nuevaLinea = replace(nuevaLinea, "`", "!"); + nuevaLinea = replace(nuevaLinea, "ga-y", "g.y"); + nuevaLinea = replace(nuevaLinea, "g-y", "g.y"); + nuevaLinea = replace(nuevaLinea, "na-y", "n+y"); + + len = nuevaLinea.length(); + for (i=0; i0 && i1 && !Character.isLetter(nuevaLinea.charAt(i-2)) || chN == 'a' && (i+2==len || !Character.isLetter(nuevaLinea.charAt(i+2))))) + { + nuevaLinea = nuevaLinea.substring(0,i-1) + Character.toUpperCase(chN) + nuevaLinea.substring(i+2); + len-=2; + } + } + } + } + } + + open = false; + for (i=0; i0) + { + i--; + break; + } + default: + if (Character.isLowerCase(caract[i])) + caract[i] = Character.toUpperCase(caract[i]); + else if (Character.isUpperCase(caract[i])) + caract[i] = Character.toLowerCase(caract[i]); + /* break ciclo; + } + } + } + nuevaPalabra = new String(caract); + // nuevaPalabra = palabra.toUpperCase(); + + // ahora hacer los cambios de Michael Roach + + nuevaPalabra = replace(nuevaPalabra, "TSH", "TQQ"); + nuevaPalabra = replace(nuevaPalabra, "TS", "TZ"); + nuevaPalabra = replace(nuevaPalabra, "TQQ", "TS"); + nuevaPalabra = replace(nuevaPalabra, "a", "'A"); + nuevaPalabra = replace(nuevaPalabra, "i", "'I"); + nuevaPalabra = replace(nuevaPalabra, "u", "'U"); + nuevaPalabra = replace(nuevaPalabra, "-I", "i"); + nuevaPalabra = replace(nuevaPalabra, "/", ","); + nuevaPalabra = replace(nuevaPalabra, "_", " "); + nuevaPalabra = replace(nuevaPalabra, "|", ";"); + nuevaPalabra = fixWazur(nuevaPalabra); + return nuevaPalabra; */ + } + + public static String unicodeToWylie(String unicode) + { + String machineWylie; + TibetanDocument tibDoc = new TibetanDocument(); + StringBuffer errors = new StringBuffer(); + + machineWylie = Converter.convertToEwtsForComputers(unicode, errors); + try + { + TibTextUtils.insertTibetanMachineWebForTranslit(true, machineWylie, tibDoc, 0, false); + } + catch (InvalidTransliterationException e) + { + return null; + } + return tibDoc.getWylie(new boolean[] { false }); + } + + /** From http://www.i18nfaq.com/2005/07/how-do-i-convert-ncr-format-to-java.html */ + public static String NCR2UnicodeString(String str) + { + StringBuffer ostr = new StringBuffer(); + int i1=0; + int i2=0; + + while(i2"); out.println(""); - out.println(""); + out.println(""); if (useTHDLBanner) { - out.println(" Tibetan and Himalayan Digital Library - The Online Tibetan to English Translation/Dictionary Tool"); - out.println(" "); - out.println(" "); - out.println(" "); - } - else - out.println(" The Online Tibetan to English Translation/Dictionary Tool"); - - out.println(" "); - out.println(" "); - out.println(" "); + out.println(" Tibetan and Himalayan Digital Library - The Online Tibetan to English Translation/Dictionary Tool"); + out.println(" "); + out.println(" "); + out.println(" "); + } + else + out.println(" The Online Tibetan to English Translation/Dictionary Tool"); + + out.println(" "); + out.println(" "); + out.println(" "); + + answer = request.getParameter(scriptStr); + + /* script==null || makes default tibetan + script!=null && makes default roman + */ + wantsTibetan = (answer==null || answer.equals(tibetanStr)); + /*if (wantsTibetan) + { + out.println(""); + }*/ + out.println(""); + out.println(""); - answer = request.getParameter(scriptStr); - - /* script==null || makes default tibetan - script!=null && makes default roman - */ - wantsTibetan = (answer==null || answer.equals(tibetanStr)); - if (wantsTibetan) - { - out.println(""); - } - out.println(""); - out.println(""); - if (useTHDLBanner) { - out.println(""); - out.println("
"); - out.println("
"); - out.println("
"); - out.println("

"); - out.println(" "); - out.println(" "); - out.println(" "); - out.println("

"); - out.println("
"); - out.println("
"); - out.println("
"); - out.println(" Home > Reference > Translation Tool"); - out.println("
"); - out.println("
"); - out.println("
"); - } - - out.println("

The Online Tibetan to English Translation/Dictionary Tool

"); - - try - { - out.println(rb.getString(otherLinksProperty)); - } - catch (MissingResourceException e) - { - // do nothing - } - + out.println(""); + out.println("
"); + out.println("
"); + out.println("
"); + out.println("

"); + out.println(" "); + out.println(" "); + out.println(" "); + out.println("

"); + out.println("
"); + out.println("
"); + out.println("
"); + out.println(" Home > Reference > Translation Tool"); + out.println("
"); + out.println("
"); + out.println("
"); + } + + out.println("

The Online Tibetan to English Translation/Dictionary Tool

"); + + try + { + out.println(rb.getString(otherLinksProperty)); + } + catch (MissingResourceException e) + { + // do nothing + } + if (useTHDLBanner) { - out.println("
"); - } - else - { - out.println(""); - } - out.println(""); - out.println(" "); - out.println(" "); - out.println(" "); - out.println(" "); - out.println("
"); - out.println("

Display results in:

"); - out.println("

Tibetan script (using Tibetan Machine Web font)
"); - out.println(" Roman script

"); - + out.println(""); + } + else + { + out.println(""); + } + out.println(""); + out.println(" "); + out.println(" "); + out.println(" "); + out.println(" "); + out.println("
"); + out.println("

Display results in:

"); + out.println("

Tibetan script (using Tibetan Machine Uni font)
"); + out.println(" Roman script

"); + if (dictionaries!=null) { int i; ds = scanner.getDictionarySource(); ds.reset(); checkedDicts = new boolean[dictionaries.length]; -/* out.println(" "); - out.println("Search in dictionaries:");*/ + /* out.println(" "); + out.println("Search in dictionaries:");*/ out.println("

Search in dictionaries: "); allUnchecked=true; for (i=0; i"); out.println("

"); - out.println(""); - out.println(" "); - out.println(" "); + out.println(" "); + out.println(" "); + out.println("
"); + out.println(""); + out.println(" "); + out.println(" "); - out.println(" "); - out.println(" "); - out.println("
"); out.println("

Input text:

"); - out.println("
"); - out.println("

"); - out.println("
"); + out.println("
"); + out.println("

"); + out.println("
"); - out.println(""); - out.println("
"); + if (answer == null || answer != null && !answer.equals(clearStr)) + { + parrafo = request.getParameter("parrafo"); + if (parrafo!=null) out.print(parrafo); + } - if (parrafo != null) - { - sl.writeLog("Translation\tOnLineScannerFilter"); - if (ds!=null && !ds.isEmpty()) - desglosar(parrafo, out, wantsTibetan); - } - else sl.writeLog("Invocation\tOnLineScannerFilter"); + out.println(""); + out.println(""); + + if (parrafo != null) + { + sl.writeLog("Translation\tOnLineScannerFilter"); + if (ds!=null && !ds.isEmpty()) + desglosar(parrafo, out, wantsTibetan); + } + else sl.writeLog("Invocation\tOnLineScannerFilter"); out.println(TibetanScanner.copyrightHTML); if (useTHDLBanner) out.println("
"); - out.println(""); - out.println(""); - } + out.println(""); + out.println(""); + } - public void doPost(HttpServletRequest request, - HttpServletResponse response) - //throws IOException, ServletException - { - doGet(request, response); - } + public void doPost(HttpServletRequest request, + HttpServletResponse response) + //throws IOException, ServletException + { + doGet(request, response); + } synchronized public void desglosar(String in, PrintWriter pw, boolean tibetan) { @@ -298,22 +300,25 @@ public class OnLineScannerFilter extends HttpServlet if (!in.equals("")) { - /* while (hayMasLineas) - { - fin = in.indexOf("\n",init); - if (fin<0) - { - linea = in.substring(init).trim(); - hayMasLineas=false; - } - else - linea = in.substring(init, fin).trim(); - - scanner.scanBody(linea); - - init = fin+1; - } */ + /* while (hayMasLineas) + { + fin = in.indexOf("\n",init); + if (fin<0) + { + linea = in.substring(init).trim(); + hayMasLineas=false; + } + else + linea = in.substring(init, fin).trim(); + + scanner.scanBody(linea); + + init = fin+1; + } */ scanner.clearTokens(); + in = Manipulate.NCR2UnicodeString(in); + if (Manipulate.guessIfUnicode(in)) in = Manipulate.unicodeToWylie(in); + else if (Manipulate.guessIfAcip(in)) in = Manipulate.acipToWylie(in); scanner.scanBody(in); scanner.finishUp(); printText(pw, tibetan); @@ -335,35 +340,35 @@ public class OnLineScannerFilter extends HttpServlet for (i=0; i < words.length; i++) { - if (words[i] instanceof Word) - { - word = new SwingWord((Word)words[i]); - // if (word.getDefs().getDictionarySource()!=null) - pw.print(word.getLink()); - // else pw.print(word.getWylie() + " "); - } - else - { - if (words[i] instanceof PunctuationMark) - { - pm = words[i].toString().charAt(0); - switch (pm) - { - case '\n': - pw.println("

"); - pw.print("

"); - break; - case '<': - pw.print("< "); - break; - case '>': - pw.print("> "); - break; - default: - pw.print(pm + " "); - } - } - } + if (words[i] instanceof Word) + { + word = new SwingWord((Word)words[i]); + // if (word.getDefs().getDictionarySource()!=null) + pw.print(word.getLink(tibetan)); + // else pw.print(word.getWylie() + " "); + } + else + { + if (words[i] instanceof PunctuationMark) + { + pm = words[i].toString().charAt(0); + switch (pm) + { + case '\n': + pw.println("

"); + pw.print("

"); + break; + case '<': + pw.print("< "); + break; + case '>': + pw.print("> "); + break; + default: + pw.print(pm + " "); + } + } + } } pw.println("

"); } @@ -376,17 +381,17 @@ public class OnLineScannerFilter extends HttpServlet String tag; DictionarySource ds; ByteDictionarySource sourceb=null; - + words = scanner.getWordArray(false); - + if (words == null) return; - + pw.println(""); - + for (j = 0; j < words.length; j++) { try { - + word = new SwingWord(words[j]); defs = word.getDefs(); ds = defs.getDictionarySource(); @@ -400,19 +405,20 @@ public class OnLineScannerFilter extends HttpServlet } else { sourceb = (ByteDictionarySource) ds; - k=0; - while (sourceb.isEmpty(k)) k++; - tag = sourceb.getTag(k); - k++; + k=0; + while (sourceb.isEmpty(k)) k++; + tag = sourceb.getTag(k); + k++; } } - pw.println(" "); + pw.print(" "); pw.println(" "); pw.println(" "); - + pw.println(" "); for (i = 1; i < defs.def.length; i++) { pw.println(" "); @@ -421,9 +427,9 @@ public class OnLineScannerFilter extends HttpServlet tag = ds.getTag(i); } else { - while (sourceb.isEmpty(k)) k++; - tag = sourceb.getTag(k); - k++; + while (sourceb.isEmpty(k)) k++; + tag = sourceb.getTag(k); + k++; } pw.println(" "); @@ -435,17 +441,17 @@ public class OnLineScannerFilter extends HttpServlet sl.writeLog("Crash\tOnLineScannerFilter\t" + word.getWylie()); sl.writeException(e); } - + } pw.println("
" + word.getBookmark(tibetan) - + "" + word.getBookmark(tibetan) + "" + tag + "" + defs.def[0] + "
" + tag + "
"); } public void destroy() { - super.destroy(); - sl.setUserIP(null); - sl.writeLog("Shutdown\tOnLineScannerFilter"); - scanner.destroy(); + super.destroy(); + sl.setUserIP(null); + sl.writeLog("Shutdown\tOnLineScannerFilter"); + scanner.destroy(); } } diff --git a/source/org/thdl/tib/scanner/StrictDuffPane.java b/source/org/thdl/tib/scanner/StrictDuffPane.java index 3ded0eb..747e469 100644 --- a/source/org/thdl/tib/scanner/StrictDuffPane.java +++ b/source/org/thdl/tib/scanner/StrictDuffPane.java @@ -1,20 +1,20 @@ /* -The contents of this file are subject to the AMP Open Community License -Version 1.0 (the "License"); you may not use this file except in compliance -with the License. You may obtain a copy of the License on the AMP web site -(http://www.tibet.iteso.mx/Guatemala/). - -Software distributed under the License is distributed on an "AS IS" basis, -WITHOUT WARRANTY OF ANY KIND, either express or implied. See the -License for the specific terms governing rights and limitations under the -License. - -The Initial Developer of this software is Andres Montano Pellegrini. Portions -created by Andres Montano Pellegrini are Copyright 2001 Andres Montano -Pellegrini. All Rights Reserved. - -Contributor(s): ______________________________________. -*/ + The contents of this file are subject to the AMP Open Community License + Version 1.0 (the "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License on the AMP web site + (http://www.tibet.iteso.mx/Guatemala/). + + Software distributed under the License is distributed on an "AS IS" basis, + WITHOUT WARRANTY OF ANY KIND, either express or implied. See the + License for the specific terms governing rights and limitations under the + License. + + The Initial Developer of this software is Andres Montano Pellegrini. Portions + created by Andres Montano Pellegrini are Copyright 2001 Andres Montano + Pellegrini. All Rights Reserved. + + Contributor(s): ______________________________________. + */ package org.thdl.tib.scanner; @@ -29,127 +29,136 @@ import javax.swing.text.BadLocationException; import org.thdl.tib.input.DuffPane; import org.thdl.tib.text.TibetanDocument; +import org.thdl.tib.text.reverter.*; import org.thdl.util.RTFFixerInputStream; import org.thdl.util.ThdlDebug; import org.thdl.util.ThdlOptions; /** Identical to DuffPane except that it only supports Tibetan script in - TibetanMachineWeb. No roman script can be inputted. If roman script is - pasted, it is assumed that it is either ACIP or wylie and is converted - accordingly to TibetanMachineWeb. If text in TibetanMachine is pasted, - it is converted to TibetanMachineWeb. Any other font is assumed to be - Roman script. -*/ + TibetanMachineWeb. No roman script can be inputted. If roman script is + pasted, it is assumed that it is either ACIP or wylie and is converted + accordingly to TibetanMachineWeb. If text in TibetanMachine is pasted, + it is converted to TibetanMachineWeb. Any other font is assumed to be + Roman script. + */ public class StrictDuffPane extends DuffPane { - public StrictDuffPane() - { - super(); - disableRoman(); - } - - /** Smart paste! Automatically recognizes what is being pasted and converts - respectively. Currently it supports pasting from TibetanMachineWeb, - TibetanMachine, wylie, and ACIP. - */ - public void paste(int offset) - { - // Respect setEditable(boolean): - if (!this.isEditable()) - return; - - try - { - Transferable contents = rtfBoard.getContents(this); - - if (contents.isDataFlavorSupported(rtfFlavor)){ - - InputStream in = (InputStream)contents.getTransferData(rtfFlavor); - int p1 = offset; - - //construct new document that contains only portion of text you want to paste - TibetanDocument sd = new TibetanDocument(); - - // I swear this happened once when I pasted in some - // random junk just after Jskad started up. - ThdlDebug.verify(null != in); - - boolean errorReading = false; - - try - { - if (!ThdlOptions.getBooleanOption("thdl.do.not.fix.rtf.hex.escapes")) - in = new RTFFixerInputStream(in); - rtfEd.read(in, sd, 0); - } catch (Exception e) { - - errorReading = true; - - /* If fonts weren't supported and we don't know what it is try to paste - ACIP or wylie. - */ - if (contents.isDataFlavorSupported(DataFlavor.stringFlavor)) - { - String data = (String)contents.getTransferData(DataFlavor.stringFlavor); - if (Manipulate.guessIfAcip(data)) data = Manipulate.acipToWylie(data); - toTibetanMachineWeb(data, offset); - } - // JOptionPane.showMessageDialog(this, "You cannot paste from the application from which you copied.\nIt uses an RTF format that is too advanced for the version\nof Java Jskad is running atop."); - } - - if (!errorReading) - { - /* If it is any font beside TibetanMachine and TibetanMachineWeb - assume it is wylie or Acip. - */ - if (!sd.getFont((sd.getCharacterElement(0).getAttributes())).getFamily().startsWith("TibetanMachine") - && contents.isDataFlavorSupported(DataFlavor.stringFlavor)) - { - String data = (String)contents.getTransferData(DataFlavor.stringFlavor); - if (Manipulate.guessIfAcip(data)) data = Manipulate.acipToWylie(data); - toTibetanMachineWeb(data, offset); - } - else - { - // If it's font is TibetanMachine, convert to TibetanMachineWeb first - if (sd.getFont((sd.getCharacterElement(0).getAttributes())).getFamily().equals("TibetanMachine")) - { - StringBuffer errors = new StringBuffer(); - long numAttemptedReplacements[] = new long[] { 0 }; - sd.convertToTMW(0, -1, errors, numAttemptedReplacements); - } - - for (int i=0; i" + result + " "; + return "" + result + " "; } } diff --git a/source/org/thdl/tib/scanner/TibetanScanner.java b/source/org/thdl/tib/scanner/TibetanScanner.java index d473ae8..d4d439b 100644 --- a/source/org/thdl/tib/scanner/TibetanScanner.java +++ b/source/org/thdl/tib/scanner/TibetanScanner.java @@ -27,7 +27,7 @@ import org.thdl.util.ThdlVersion; */ public abstract class TibetanScanner { - public static final String version = "The Tibetan to English Translation Tool, version 3.2.1 compiled on " + ThdlVersion.getTimeOfCompilation() + ". "; + public static final String version = "The Tibetan to English Translation Tool, version 3.3.0 compiled on " + ThdlVersion.getTimeOfCompilation() + ". "; public static final String copyrightUnicode="Copyright " + '\u00A9' + " 2000-2005 by Andr" + '\u00E9' + "s Montano Pellegrini, all rights reserved."; public static final String copyrightASCII="Copyright 2000-2005 by Andres Montano Pellegrini, all rights reserved."; public static final String copyrightHTML="
" + version + "Copyright © 2000-2005 by Andrés Montano Pellegrini.
All rights reserved.
";