Updated the translation tool to accept Tibetan Unicode. For the application version, this means that the smart paste is unicode aware and will do the appropriate conversion. In the servlet version tibetan unicode can now be inputted in the form and tibetan machine uni is used to display the results.

The Manipulate class now includes: acipToWylie, wylieToAcip, and unicodeToWylie. They provide a simple interfase to David Chandler's converters that are used by the translation tool.
This commit is contained in:
amontano 2006-04-24 06:09:17 +00:00
parent 2a47ff68a4
commit 67bddb7a7e
6 changed files with 851 additions and 717 deletions

View file

@ -1,44 +1,44 @@
/* /*
The contents of this file are subject to the AMP Open Community License The contents of this file are subject to the AMP Open Community License
Version 1.0 (the "License"); you may not use this file except in compliance Version 1.0 (the "License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License on the AMP web site with the License. You may obtain a copy of the License on the AMP web site
(http://www.tibet.iteso.mx/Guatemala/). (http://www.tibet.iteso.mx/Guatemala/).
Software distributed under the License is distributed on an "AS IS" basis, Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
License for the specific terms governing rights and limitations under the License for the specific terms governing rights and limitations under the
License. License.
The Initial Developer of this software is Andres Montano Pellegrini. Portions The Initial Developer of this software is Andres Montano Pellegrini. Portions
created by Andres Montano Pellegrini are Copyright 2001 Andres Montano created by Andres Montano Pellegrini are Copyright 2001 Andres Montano
Pellegrini. All Rights Reserved. Pellegrini. All Rights Reserved.
Contributor(s): ______________________________________. Contributor(s): ______________________________________.
*/ */
package org.thdl.tib.scanner; package org.thdl.tib.scanner;
import java.util.Enumeration; import java.util.Enumeration;
import java.util.Vector; import java.util.Vector;
/** Loads dictionary stored in tree format and searches for words recursively. /** Loads dictionary stored in tree format and searches for words recursively.
How the the dictionary is loaded depends on which implementation of How the the dictionary is loaded depends on which implementation of
{@link SyllableListTree} is invoked. {@link SyllableListTree} is invoked.
@author Andrés Montano Pellegrini @author Andrés Montano Pellegrini
@see SyllableListTree @see SyllableListTree
*/ */
public class LocalTibetanScanner extends TibetanScanner public class LocalTibetanScanner extends TibetanScanner
{ {
public static String archivo; public static String archivo;
private SyllableListTree raiz, silActual, lastCompSil, silAnterior; private SyllableListTree raiz, silActual, lastCompSil, silAnterior;
private String wordActual, lastCompWord; private String wordActual, lastCompWord;
private Vector floatingSil; private Vector floatingSil;
static static
{ {
archivo = null; archivo = null;
} }
public BitDictionarySource getDictionarySource() public BitDictionarySource getDictionarySource()
{ {
return raiz.getDictionarySourcesWanted(); return raiz.getDictionarySourcesWanted();
@ -46,12 +46,12 @@ public class LocalTibetanScanner extends TibetanScanner
public LocalTibetanScanner(String arch) throws Exception public LocalTibetanScanner(String arch) throws Exception
{ {
this (arch, true); this (arch, true);
} }
public LocalTibetanScanner(String arch, boolean backwardCompatible) throws Exception public LocalTibetanScanner(String arch, boolean backwardCompatible) throws Exception
{ {
super(); super();
archivo = arch; archivo = arch;
// raiz = new MemorySyllableListTree(archivo); // raiz = new MemorySyllableListTree(archivo);
// raiz = new FileSyllableListTree(archivo); // raiz = new FileSyllableListTree(archivo);
@ -59,13 +59,13 @@ public class LocalTibetanScanner extends TibetanScanner
floatingSil = new Vector(); floatingSil = new Vector();
resetAll(); resetAll();
} }
private void resetAll() private void resetAll()
{ {
silAnterior = silActual = lastCompSil = null; silAnterior = silActual = lastCompSil = null;
wordActual = lastCompWord = null; wordActual = lastCompWord = null;
} }
private void scanSyllable(String sil) private void scanSyllable(String sil)
{ {
SyllableListTree resultado=null; SyllableListTree resultado=null;
@ -73,13 +73,13 @@ public class LocalTibetanScanner extends TibetanScanner
Word w; Word w;
String silSinDec; String silSinDec;
boolean aadded; boolean aadded;
if (silActual==null) if (silActual==null)
silActual = raiz; silActual = raiz;
silAnterior = silActual; silAnterior = silActual;
silActual = silActual.lookUp(sil); silActual = silActual.lookUp(sil);
if (silActual != null) if (silActual != null)
{ {
if (silActual.hasDef()) if (silActual.hasDef())
@ -113,9 +113,9 @@ public class LocalTibetanScanner extends TibetanScanner
} }
else else
{ {
resultado = null; resultado = null;
if (aadded) silSinDec = silSinDec.substring(0, silSinDec.length()-1); if (aadded) silSinDec = silSinDec.substring(0, silSinDec.length()-1);
silSinDec = withOutDec(silSinDec); silSinDec = withOutDec(silSinDec);
} }
} }
if (resultado!=null) return; if (resultado!=null) return;
@ -134,9 +134,9 @@ public class LocalTibetanScanner extends TibetanScanner
{ {
resultado = silAnterior.lookUp(silSinDec); resultado = silAnterior.lookUp(silSinDec);
/* here we don't have to worry about being in the middle of a /* here we don't have to worry about being in the middle of a
word since the declension marks that it is the end of a word since the declension marks that it is the end of a
word. word.
*/ */
if (resultado == null || !resultado.hasDef()) if (resultado == null || !resultado.hasDef())
{ {
silSinDec += "\'"; silSinDec += "\'";
@ -153,26 +153,26 @@ public class LocalTibetanScanner extends TibetanScanner
} }
else else
{ {
resultado = null; resultado = null;
if (aadded) silSinDec = silSinDec.substring(0, silSinDec.length()-1); if (aadded) silSinDec = silSinDec.substring(0, silSinDec.length()-1);
silSinDec = withOutDec(silSinDec); silSinDec = withOutDec(silSinDec);
} }
} }
if (resultado!=null) return; if (resultado!=null) return;
if (lastCompSil!=null) if (lastCompSil!=null)
{ {
if (lastCompWord.equals(wordActual)) w = new Word(lastCompWord, lastCompSil.getDefs()); if (lastCompWord.equals(wordActual)) w = new Word(lastCompWord, lastCompSil.getDefs());
else w = new Word(lastCompWord, wordActual, lastCompSil.getDefs()); else w = new Word(lastCompWord, wordActual, lastCompSil.getDefs());
wordList.addLast(w); wordList.addLast(w);
this.resetAll(); this.resetAll();
enumeration = floatingSil.elements(); enumeration = floatingSil.elements();
floatingSil = new Vector(); floatingSil = new Vector();
while (enumeration.hasMoreElements()) while (enumeration.hasMoreElements())
scanSyllable((String)enumeration.nextElement()); scanSyllable((String)enumeration.nextElement());
scanSyllable(sil); scanSyllable(sil);
} }
else else
@ -193,32 +193,32 @@ public class LocalTibetanScanner extends TibetanScanner
} }
} }
} }
public void finishUp() public void finishUp()
{ {
Enumeration enumeration; Enumeration enumeration;
Word w; Word w;
while (lastCompSil!=null) while (lastCompSil!=null)
{ {
if (lastCompWord.equals(wordActual)) w = new Word(lastCompWord, lastCompSil.getDefs()); if (lastCompWord.equals(wordActual)) w = new Word(lastCompWord, lastCompSil.getDefs());
else w = new Word(lastCompWord, wordActual, lastCompSil.getDefs()); else w = new Word(lastCompWord, wordActual, lastCompSil.getDefs());
wordList.addLast(w); wordList.addLast(w);
this.resetAll(); this.resetAll();
enumeration = floatingSil.elements(); enumeration = floatingSil.elements();
floatingSil = new Vector(); floatingSil = new Vector();
while (enumeration.hasMoreElements()) while (enumeration.hasMoreElements())
scanSyllable((String)enumeration.nextElement()); scanSyllable((String)enumeration.nextElement());
} }
if (silActual!=null) if (silActual!=null)
{ {
wordList.addLast(new Word(wordActual, "[incomplete word]")); wordList.addLast(new Word(wordActual, "[incomplete word]"));
this.resetAll(); this.resetAll();
} }
} }
private static String concatWithSpace(String s1, String s2) private static String concatWithSpace(String s1, String s2)
{ {
if (s1==null || s1.equals("")) if (s1==null || s1.equals(""))
@ -226,14 +226,14 @@ public class LocalTibetanScanner extends TibetanScanner
else else
return s1 + ' ' + s2; return s1 + ' ' + s2;
} }
private static String withOutDec(String sil) private static String withOutDec(String sil)
{ {
boolean isDeclined =false; boolean isDeclined =false;
int len = sil.length(), apos; int len = sil.length(), apos;
if (len<3) return null; if (len<3) return null;
char lastCar = Character.toLowerCase(sil.charAt(len-1)); char lastCar = Character.toLowerCase(sil.charAt(len-1));
if ((lastCar == 's' || lastCar == 'r') && Manipulate.isVowel(sil.charAt(len-2))) if ((lastCar == 's' || lastCar == 'r') && Manipulate.isVowel(sil.charAt(len-2)))
{ {
@ -242,19 +242,19 @@ public class LocalTibetanScanner extends TibetanScanner
} }
else else
{ {
apos = sil.lastIndexOf('\''); apos = sil.lastIndexOf('\'');
if (apos>0 && apos < len-1 && Manipulate.isVowel(sil.charAt(apos-1)) && sil.charAt(apos+1)!='u') if (apos>0 && apos < len-1 && Manipulate.isVowel(sil.charAt(apos-1)) && sil.charAt(apos+1)!='u')
{ {
isDeclined=true; isDeclined=true;
sil = sil.substring(0, apos); sil = sil.substring(0, apos);
} }
/* if ((lastCar == 'i' || lastCar == 'o') && sil.charAt(len-2)=='\'') /* if ((lastCar == 'i' || lastCar == 'o') && sil.charAt(len-2)=='\'')
{ {
isDeclined=true; isDeclined=true;
sil = sil.substring(0, len-2); sil = sil.substring(0, len-2);
}*/ }*/
} }
if (!isDeclined) return null; if (!isDeclined) return null;
return sil; return sil;
} }
@ -262,127 +262,127 @@ public class LocalTibetanScanner extends TibetanScanner
public void scanBody(String in) public void scanBody(String in)
{ {
boolean hayMasLineas=true; boolean hayMasLineas=true;
if (in.equals("")) finishUp(); if (in.equals("")) finishUp();
else else
{ {
int init = 0, fin; int init = 0, fin;
String linea; String linea;
while (hayMasLineas) while (hayMasLineas)
{ {
fin = in.indexOf("\n",init); fin = in.indexOf("\n",init);
if (fin<0) if (fin<0)
{ {
linea = in.substring(init).trim(); linea = in.substring(init).trim();
hayMasLineas=false; hayMasLineas=false;
} }
else else
linea = in.substring(init, fin).trim(); linea = in.substring(init, fin).trim();
if (linea.equals("")) if (linea.equals(""))
{ {
finishUp(); finishUp();
wordList.addLast(new PunctuationMark('\n')); wordList.addLast(new PunctuationMark('\n'));
} }
else else
scanLine(linea); scanLine(linea);
init = fin+1; init = fin+1;
} }
} }
} }
public void scanLine(String linea) public void scanLine(String linea)
{ {
int init = 0, fin; int init = 0, fin;
char ch; char ch;
String sil; String sil;
boolean doNotFinishUp; boolean doNotFinishUp;
if (linea.equals("")) if (linea.equals(""))
{ {
finishUp(); finishUp();
wordList.addLast(new PunctuationMark('\n')); wordList.addLast(new PunctuationMark('\n'));
return; return;
} }
outAHere: outAHere:
while(true) while(true)
{
doNotFinishUp=true;
// Make init skip all punctuation marks
while (true)
{ {
if (init>=linea.length()) doNotFinishUp=true;
break outAHere;
ch = linea.charAt(init); // Make init skip all punctuation marks
if (Manipulate.isPunctuationMark(ch)) while (true)
{ {
if (doNotFinishUp) if (init>=linea.length())
{ break outAHere;
finishUp(); ch = linea.charAt(init);
doNotFinishUp=false; if (Manipulate.isPunctuationMark(ch))
} {
wordList.addLast(new PunctuationMark(ch)); if (doNotFinishUp)
} {
else if (!Manipulate.isEndOfSyllableMark(ch)) finishUp();
break; doNotFinishUp=false;
}
init++; wordList.addLast(new PunctuationMark(ch));
} }
else if (!Manipulate.isEndOfSyllableMark(ch))
doNotFinishUp = true; break;
/* move fin to the end of the next syllable. If finishing init++;
up is necessary it is done after scanSyllable
*/
fin = init+1;
while (fin < linea.length())
{
ch = linea.charAt(fin);
if (Manipulate.isPunctuationMark(ch))
{
doNotFinishUp = false;
break;
}
else if (Manipulate.isEndOfSyllableMark(ch))
{
break;
}
else
{
fin++;
if (fin>=linea.length())
break;
} }
doNotFinishUp = true;
/* move fin to the end of the next syllable. If finishing
up is necessary it is done after scanSyllable
*/
fin = init+1;
while (fin < linea.length())
{
ch = linea.charAt(fin);
if (Manipulate.isPunctuationMark(ch))
{
doNotFinishUp = false;
break;
}
else if (Manipulate.isEndOfSyllableMark(ch))
{
break;
}
else
{
fin++;
if (fin>=linea.length())
break;
}
}
sil = linea.substring(init, fin);
scanSyllable(sil);
if (!doNotFinishUp)
{
finishUp();
wordList.addLast(new PunctuationMark(ch));
}
init = fin+1;
} }
sil = linea.substring(init, fin);
scanSyllable(sil);
if (!doNotFinishUp)
{
finishUp();
wordList.addLast(new PunctuationMark(ch));
}
init = fin+1;
}
}
/** Looks for .dic file, and returns the dictionary descriptions.
Also updates the definitionTags in the Definitions class.
*/
public String[] getDictionaryDescriptions()
{
return FileSyllableListTree.getDictionaryDescriptions(archivo);
} }
public void destroy() /** Looks for .dic file, and returns the dictionary descriptions.
{ Also updates the definitionTags in the Definitions class.
FileSyllableListTree.closeFiles(); */
} public String[] getDictionaryDescriptions()
{
return FileSyllableListTree.getDictionaryDescriptions(archivo);
}
public void destroy()
{
FileSyllableListTree.closeFiles();
}
} }

View file

@ -17,6 +17,10 @@ Contributor(s): ______________________________________.
*/ */
package org.thdl.tib.scanner; package org.thdl.tib.scanner;
import org.thdl.tib.text.*;
import org.thdl.tib.text.reverter.*;
/** Miscelaneous static methods for the manipulation of Tibetan text. /** Miscelaneous static methods for the manipulation of Tibetan text.
@author Andr&eacute;s Montano Pellegrini @author Andr&eacute;s Montano Pellegrini
@ -24,7 +28,6 @@ package org.thdl.tib.scanner;
public class Manipulate public class Manipulate
{ {
private static String endOfParagraphMarks = "/;|!:^@#$%="; private static String endOfParagraphMarks = "/;|!:^@#$%=";
private static String bracketMarks = "<>(){}[]"; private static String bracketMarks = "<>(){}[]";
private static String endOfSyllableMarks = " _\t"; private static String endOfSyllableMarks = " _\t";
@ -177,70 +180,7 @@ public class Manipulate
{ {
ch = Character.toLowerCase(ch); ch = Character.toLowerCase(ch);
return ch=='a' || ch=='e' || ch=='i' || ch=='o' || ch=='u'; return ch=='a' || ch=='e' || ch=='i' || ch=='o' || ch=='u';
} }
public static String wylieToAcip(String palabra)
{
// DLC FIXME: for unknown things, return null.
if (palabra.equals("@##")) return "#";
if (palabra.equals("@#")) return "*";
if (palabra.equals("!")) return "`";
if (palabra.equals("b+h")) return "BH";
if (palabra.equals("d+h")) return "DH";
if (palabra.equals("X")) return null;
if (palabra.equals("iA")) return null;
if (palabra.equals("ai")) return "EE";
if (palabra.equals("au")) return "OO";
if (palabra.equals("$")) return null;
if (palabra.startsWith("@") || palabra.startsWith("#"))
return null; // we can't convert this in isolation! We need context.
char []caract;
int i, j, len;
String nuevaPalabra;
caract = palabra.toCharArray();
len = palabra.length();
for (j=0; j<len; j++)
{
i = j;
/*ciclo:
while(true) // para manejar excepciones; que honda!
{
switch(caract[i])
{
case 'A':
if (i>0)
{
i--;
break;
}
default:*/
if (Character.isLowerCase(caract[i]))
caract[i] = Character.toUpperCase(caract[i]);
else if (Character.isUpperCase(caract[i]))
caract[i] = Character.toLowerCase(caract[i]);
/* break ciclo;
}
}*/
}
nuevaPalabra = new String(caract);
// nuevaPalabra = palabra.toUpperCase();
// ahora hacer los cambios de Michael Roach
nuevaPalabra = replace(nuevaPalabra, "TSH", "TQQ");
nuevaPalabra = replace(nuevaPalabra, "TS", "TZ");
nuevaPalabra = replace(nuevaPalabra, "TQQ", "TS");
nuevaPalabra = replace(nuevaPalabra, "a", "'A");
nuevaPalabra = replace(nuevaPalabra, "i", "'I");
nuevaPalabra = replace(nuevaPalabra, "u", "'U");
nuevaPalabra = replace(nuevaPalabra, "-I", "i");
nuevaPalabra = replace(nuevaPalabra, "/", ",");
nuevaPalabra = replace(nuevaPalabra, "_", " ");
nuevaPalabra = replace(nuevaPalabra, "|", ";");
nuevaPalabra = fixWazur(nuevaPalabra);
return nuevaPalabra;
}
/** If more than half of the first letters among the first are 10 characters /** If more than half of the first letters among the first are 10 characters
are uppercase assume its acip */ are uppercase assume its acip */
@ -263,125 +203,30 @@ public class Manipulate
else return (letters / upperCase < 2); else return (letters / upperCase < 2);
} }
public static String acipToWylie(String linea) public static boolean isTibetanUnicodeCharacter(char ch)
{ {
char caract[], ch, chP, chN; return ch>=0xF00 && ch<=0xFFF;
String nuevaLinea;
int i, len;
boolean open;
caract = linea.toCharArray();
len = linea.length();
for (i=0; i<len; i++)
{
if (Character.isLowerCase(caract[i]))
caract[i] = Character.toUpperCase(caract[i]);
else if (Character.isUpperCase(caract[i]))
caract[i] = Character.toLowerCase(caract[i]);
}
nuevaLinea = new String(caract);
/* ahora hacer los cambios de Michael Roach ts -> tsh, tz -> ts, v -> w,
TH -> Th, kSH, kaSH -> k+Sh, SH -> Sh, : -> H, dh -> d+h, gh -> g+h, bh -> b+h, dzh -> dz+h,
aa -> a, a'a -> A, ai->i, aee ->ai, au->u, aoo->au, ae->e,
ao->o, ee->ai, oo->au, 'I->-I I->-i, a'i->I, a'u->U, a'e->E, a'o->O,
a'i->I, a'u->U, a'e->E, a'o->O, ,->/, # -> @##, * -> @#, \ -> ?, ` -> !,
/-/ -> (-), ga-y -> g.y, g-y -> g.y, na-y -> n+y */
nuevaLinea = replace(nuevaLinea, "ts", "tq");
nuevaLinea = replace(nuevaLinea, "tz", "ts");
nuevaLinea = replace(nuevaLinea, "tq", "tsh");
nuevaLinea = replace(nuevaLinea, "v", "w");
nuevaLinea = replace(nuevaLinea, "TH", "Th");
nuevaLinea = replace(nuevaLinea, "kSH", "k+Sh");
nuevaLinea = replace(nuevaLinea, "kaSH", "k+Sh");
nuevaLinea = replace(nuevaLinea, "SH", "Sh");
nuevaLinea = replace(nuevaLinea, ":", "H");
nuevaLinea = replace(nuevaLinea, "NH", "NaH");
nuevaLinea = replace(nuevaLinea, "dh", "d+h");
nuevaLinea = replace(nuevaLinea, "gh", "g+h");
nuevaLinea = replace(nuevaLinea, "bh", "b+h");
nuevaLinea = replace(nuevaLinea, "dzh", "dz+h");
nuevaLinea = replace(nuevaLinea, "aa", "a");
nuevaLinea = replace(nuevaLinea, "ai", "i");
nuevaLinea = replace(nuevaLinea, "aee", "ai");
nuevaLinea = replace(nuevaLinea, "au", "u");
nuevaLinea = replace(nuevaLinea, "aoo", "au");
nuevaLinea = replace(nuevaLinea, "ae", "e");
nuevaLinea = replace(nuevaLinea, "ao", "o");
nuevaLinea = replace(nuevaLinea, "ee", "ai");
nuevaLinea = replace(nuevaLinea, "oo", "au");
nuevaLinea = replace(nuevaLinea, "\'I", "\'q");
nuevaLinea = replace(nuevaLinea, "I", "-i");
nuevaLinea = replace(nuevaLinea, "\'q", "-I");
nuevaLinea = replace(nuevaLinea, "\\", "?");
nuevaLinea = replace(nuevaLinea, "`", "!");
nuevaLinea = replace(nuevaLinea, "ga-y", "g.y");
nuevaLinea = replace(nuevaLinea, "g-y", "g.y");
nuevaLinea = replace(nuevaLinea, "na-y", "n+y");
len = nuevaLinea.length();
for (i=0; i<len; i++)
{
ch = nuevaLinea.charAt(i);
switch(ch)
{
case '#':
nuevaLinea = nuevaLinea.substring(0,i) + "@##" + nuevaLinea.substring(i+1);
i+=3;
len+=2;
break;
case '*':
nuevaLinea = nuevaLinea.substring(0,i) + "@#" + nuevaLinea.substring(i+1);
i+=2;
len++;
break;
case '\'':
if (i>0 && i<len-1)
{
chP = nuevaLinea.charAt(i-1);
chN = nuevaLinea.charAt(i+1);
if (isVowel(chN))
{
if (Character.isLetter(chP) && !isVowel(chP))
{
nuevaLinea = nuevaLinea.substring(0, i) + Character.toUpperCase(chN) + nuevaLinea.substring(i+2);
len--;
}
else if (chP=='a' && (i==1 || i>1 && !Character.isLetter(nuevaLinea.charAt(i-2)) || chN == 'a' && (i+2==len || !Character.isLetter(nuevaLinea.charAt(i+2)))))
{
nuevaLinea = nuevaLinea.substring(0,i-1) + Character.toUpperCase(chN) + nuevaLinea.substring(i+2);
len-=2;
}
}
}
}
}
open = false;
for (i=0; i<len; i++)
{
ch = nuevaLinea.charAt(i);
if (ch=='/')
{
if (open)
{
nuevaLinea = nuevaLinea.substring(0, i) + ")" + nuevaLinea.substring(i+1);
open = false;
}
else
{
nuevaLinea = nuevaLinea.substring(0, i) + "(" + nuevaLinea.substring(i+1);
open = true;
}
}
}
nuevaLinea = replace(nuevaLinea, ",", "/");
return nuevaLinea;
} }
public static boolean guessIfUnicode(String line)
{
char ch;
int letters=0, unicode=0, i, n;
n = line.length();
if (n>10) n = 10;
for (i=0; i<n; i++)
{
ch = line.charAt(i);
if (Character.isLetter(ch))
{
letters++;
if (isTibetanUnicodeCharacter(ch)) unicode++;
}
}
if (letters==0 || unicode==0) return false;
else return (letters / unicode < 2);
}
public static String fixWazur(String linea) public static String fixWazur(String linea)
{ {
int i; int i;
@ -529,5 +374,275 @@ public class Manipulate
System.out.println(palabra + '\t' + definicion); System.out.println(palabra + '\t' + definicion);
} }
if (psPalabras!=null) psPalabras.flush(); if (psPalabras!=null) psPalabras.flush();
}*/ }*/
public static String acipToWylie(String acip)
{
TibetanDocument tibDoc = new TibetanDocument();
try
{
TibTextUtils.insertTibetanMachineWebForTranslit(false, acip, tibDoc, 0, false);
}
catch (InvalidTransliterationException e)
{
return null;
}
return tibDoc.getWylie(new boolean[] { false });
/* char caract[], ch, chP, chN;
String nuevaLinea;
int i, len;
boolean open;
caract = acip.toCharArray();
len = acip.length();
for (i=0; i<len; i++)
{
if (Character.isLowerCase(caract[i]))
caract[i] = Character.toUpperCase(caract[i]);
else if (Character.isUpperCase(caract[i]))
caract[i] = Character.toLowerCase(caract[i]);
}
nuevaLinea = new String(caract);
/* ahora hacer los cambios de Michael Roach ts -> tsh, tz -> ts, v -> w,
TH -> Th, kSH, kaSH -> k+Sh, SH -> Sh, : -> H, dh -> d+h, gh -> g+h, bh -> b+h, dzh -> dz+h,
aa -> a, a'a -> A, ai->i, aee ->ai, au->u, aoo->au, ae->e,
ao->o, ee->ai, oo->au, 'I->-I I->-i, a'i->I, a'u->U, a'e->E, a'o->O,
a'i->I, a'u->U, a'e->E, a'o->O, ,->/, # -> @##, * -> @#, \ -> ?, ` -> !,
/-/ -> (-), ga-y -> g.y, g-y -> g.y, na-y -> n+y
nuevaLinea = replace(nuevaLinea, "ts", "tq");
nuevaLinea = replace(nuevaLinea, "tz", "ts");
nuevaLinea = replace(nuevaLinea, "tq", "tsh");
nuevaLinea = replace(nuevaLinea, "v", "w");
nuevaLinea = replace(nuevaLinea, "TH", "Th");
nuevaLinea = replace(nuevaLinea, "kSH", "k+Sh");
nuevaLinea = replace(nuevaLinea, "kaSH", "k+Sh");
nuevaLinea = replace(nuevaLinea, "SH", "Sh");
nuevaLinea = replace(nuevaLinea, ":", "H");
nuevaLinea = replace(nuevaLinea, "NH", "NaH");
nuevaLinea = replace(nuevaLinea, "dh", "d+h");
nuevaLinea = replace(nuevaLinea, "gh", "g+h");
nuevaLinea = replace(nuevaLinea, "bh", "b+h");
nuevaLinea = replace(nuevaLinea, "dzh", "dz+h");
nuevaLinea = replace(nuevaLinea, "aa", "a");
nuevaLinea = replace(nuevaLinea, "ai", "i");
nuevaLinea = replace(nuevaLinea, "aee", "ai");
nuevaLinea = replace(nuevaLinea, "au", "u");
nuevaLinea = replace(nuevaLinea, "aoo", "au");
nuevaLinea = replace(nuevaLinea, "ae", "e");
nuevaLinea = replace(nuevaLinea, "ao", "o");
nuevaLinea = replace(nuevaLinea, "ee", "ai");
nuevaLinea = replace(nuevaLinea, "oo", "au");
nuevaLinea = replace(nuevaLinea, "\'I", "\'q");
nuevaLinea = replace(nuevaLinea, "I", "-i");
nuevaLinea = replace(nuevaLinea, "\'q", "-I");
nuevaLinea = replace(nuevaLinea, "\\", "?");
nuevaLinea = replace(nuevaLinea, "`", "!");
nuevaLinea = replace(nuevaLinea, "ga-y", "g.y");
nuevaLinea = replace(nuevaLinea, "g-y", "g.y");
nuevaLinea = replace(nuevaLinea, "na-y", "n+y");
len = nuevaLinea.length();
for (i=0; i<len; i++)
{
ch = nuevaLinea.charAt(i);
switch(ch)
{
case '#':
nuevaLinea = nuevaLinea.substring(0,i) + "@##" + nuevaLinea.substring(i+1);
i+=3;
len+=2;
break;
case '*':
nuevaLinea = nuevaLinea.substring(0,i) + "@#" + nuevaLinea.substring(i+1);
i+=2;
len++;
break;
case '\'':
if (i>0 && i<len-1)
{
chP = nuevaLinea.charAt(i-1);
chN = nuevaLinea.charAt(i+1);
if (isVowel(chN))
{
if (Character.isLetter(chP) && !isVowel(chP))
{
nuevaLinea = nuevaLinea.substring(0, i) + Character.toUpperCase(chN) + nuevaLinea.substring(i+2);
len--;
}
else if (chP=='a' && (i==1 || i>1 && !Character.isLetter(nuevaLinea.charAt(i-2)) || chN == 'a' && (i+2==len || !Character.isLetter(nuevaLinea.charAt(i+2)))))
{
nuevaLinea = nuevaLinea.substring(0,i-1) + Character.toUpperCase(chN) + nuevaLinea.substring(i+2);
len-=2;
}
}
}
}
}
open = false;
for (i=0; i<len; i++)
{
ch = nuevaLinea.charAt(i);
if (ch=='/')
{
if (open)
{
nuevaLinea = nuevaLinea.substring(0, i) + ")" + nuevaLinea.substring(i+1);
open = false;
}
else
{
nuevaLinea = nuevaLinea.substring(0, i) + "(" + nuevaLinea.substring(i+1);
open = true;
}
}
}
nuevaLinea = replace(nuevaLinea, ",", "/");
return nuevaLinea; */
}
public static String wylieToAcip(String wylie)
{
TibetanDocument tibDoc = new TibetanDocument();
try
{
TibTextUtils.insertTibetanMachineWebForTranslit(false, wylie, tibDoc, 0, false);
}
catch (InvalidTransliterationException e)
{
return null;
}
return tibDoc.getACIP(new boolean[] { false });
/* DLC FIXME: for unknown things, return null.
if (wylie.equals("@##")) return "#";
if (wylie.equals("@#")) return "*";
if (wylie.equals("!")) return "`";
if (wylie.equals("b+h")) return "BH";
if (wylie.equals("d+h")) return "DH";
if (wylie.equals("X")) return null;
if (wylie.equals("iA")) return null;
if (wylie.equals("ai")) return "EE";
if (wylie.equals("au")) return "OO";
if (wylie.equals("$")) return null;
if (wylie.startsWith("@") || wylie.startsWith("#"))
return null; // we can't convert this in isolation! We need context.
char []caract;
int i, j, len;
String nuevaPalabra;
caract = wylie.toCharArray();
len = wylie.length();
for (j=0; j<len; j++)
{
i = j;
/*ciclo:
while(true) // para manejar excepciones; que honda!
{
switch(caract[i])
{
case 'A':
if (i>0)
{
i--;
break;
}
default:
if (Character.isLowerCase(caract[i]))
caract[i] = Character.toUpperCase(caract[i]);
else if (Character.isUpperCase(caract[i]))
caract[i] = Character.toLowerCase(caract[i]);
/* break ciclo;
}
}
}
nuevaPalabra = new String(caract);
// nuevaPalabra = palabra.toUpperCase();
// ahora hacer los cambios de Michael Roach
nuevaPalabra = replace(nuevaPalabra, "TSH", "TQQ");
nuevaPalabra = replace(nuevaPalabra, "TS", "TZ");
nuevaPalabra = replace(nuevaPalabra, "TQQ", "TS");
nuevaPalabra = replace(nuevaPalabra, "a", "'A");
nuevaPalabra = replace(nuevaPalabra, "i", "'I");
nuevaPalabra = replace(nuevaPalabra, "u", "'U");
nuevaPalabra = replace(nuevaPalabra, "-I", "i");
nuevaPalabra = replace(nuevaPalabra, "/", ",");
nuevaPalabra = replace(nuevaPalabra, "_", " ");
nuevaPalabra = replace(nuevaPalabra, "|", ";");
nuevaPalabra = fixWazur(nuevaPalabra);
return nuevaPalabra; */
}
public static String unicodeToWylie(String unicode)
{
String machineWylie;
TibetanDocument tibDoc = new TibetanDocument();
StringBuffer errors = new StringBuffer();
machineWylie = Converter.convertToEwtsForComputers(unicode, errors);
try
{
TibTextUtils.insertTibetanMachineWebForTranslit(true, machineWylie, tibDoc, 0, false);
}
catch (InvalidTransliterationException e)
{
return null;
}
return tibDoc.getWylie(new boolean[] { false });
}
/** From http://www.i18nfaq.com/2005/07/how-do-i-convert-ncr-format-to-java.html */
public static String NCR2UnicodeString(String str)
{
StringBuffer ostr = new StringBuffer();
int i1=0;
int i2=0;
while(i2<str.length())
{
i1 = str.indexOf("&#",i2);
if (i1 == -1 ) {
ostr.append(str.substring(i2, str.length()));
break ;
}
ostr.append(str.substring(i2, i1));
i2 = str.indexOf(";", i1);
if (i2 == -1 ) {
ostr.append(str.substring(i1, str.length()));
break ;
}
String tok = str.substring(i1+2, i2);
try {
int radix = 10 ;
if (tok.trim().charAt(0) == 'x') {
radix = 16 ;
tok = tok.substring(1,tok.length());
}
ostr.append((char) Integer.parseInt(tok, radix));
} catch (NumberFormatException exp) {
ostr.append('?') ;
}
i2++ ;
}
return new String(ostr) ;
}
public static String UnicodeString2NCR(String str)
{
StringBuffer ncr = new StringBuffer();
int i;
for (i=0; i<str.length(); i++)
{
ncr.append("&#" + Integer.toString(str.charAt(i)) + ";");
}
return ncr.toString();
}
} }

View file

@ -1,20 +1,20 @@
/* /*
The contents of this file are subject to the AMP Open Community License The contents of this file are subject to the AMP Open Community License
Version 1.0 (the "License"); you may not use this file except in compliance Version 1.0 (the "License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License on the AMP web site with the License. You may obtain a copy of the License on the AMP web site
(http://www.tibet.iteso.mx/Guatemala/). (http://www.tibet.iteso.mx/Guatemala/).
Software distributed under the License is distributed on an "AS IS" basis, Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
License for the specific terms governing rights and limitations under the License for the specific terms governing rights and limitations under the
License. License.
The Initial Developer of this software is Andres Montano Pellegrini. Portions The Initial Developer of this software is Andres Montano Pellegrini. Portions
created by Andres Montano Pellegrini are Copyright 2001 Andres Montano created by Andres Montano Pellegrini are Copyright 2001 Andres Montano
Pellegrini. All Rights Reserved. Pellegrini. All Rights Reserved.
Contributor(s): ______________________________________. Contributor(s): ______________________________________.
*/ */
package org.thdl.tib.scanner; package org.thdl.tib.scanner;
@ -29,24 +29,24 @@ import javax.servlet.http.HttpServletResponse;
import org.thdl.util.ThdlOptions; import org.thdl.util.ThdlOptions;
/** Interfase to provide access to an on-line dictionary through a form in html; /** Interfase to provide access to an on-line dictionary through a form in html;
Inputs Tibetan text (Roman script only) and displays the Inputs Tibetan text (Roman script only) and displays the
words (Roman or Tibetan script) with their definitions. words (Roman or Tibetan script) with their definitions.
Runs on the server and is called upon through an HTTP request directly Runs on the server and is called upon through an HTTP request directly
by the browser. Requires no additional software installed on the client. by the browser. Requires no additional software installed on the client.
@author Andr&eacute;s Montano Pellegrini @author Andr&eacute;s Montano Pellegrini
*/ */
public class OnLineScannerFilter extends HttpServlet public class OnLineScannerFilter extends HttpServlet
{ {
private final static String propertyFile = "dictionary"; private final static String propertyFile = "dictionary";
private final static String dictNameProperty = "onlinescannerfilter.dict-file-name"; private final static String dictNameProperty = "onlinescannerfilter.dict-file-name";
private final static String otherLinksProperty = "onlinescannerfilter.links-to-other-stuff"; private final static String otherLinksProperty = "onlinescannerfilter.links-to-other-stuff";
private final static String clearStr = "Clear"; private final static String clearStr = "Clear";
private final static String buttonStr = "button"; private final static String buttonStr = "button";
private final static String scriptStr = "script"; private final static String scriptStr = "script";
private final static String tibetanStr = "tibetan"; private final static String tibetanStr = "tibetan";
ResourceBundle rb; ResourceBundle rb;
private TibetanScanner scanner; private TibetanScanner scanner;
private String dictionaries[]; private String dictionaries[];
private ScannerLogger sl; private ScannerLogger sl;
@ -55,150 +55,150 @@ public class OnLineScannerFilter extends HttpServlet
{ {
rb = ResourceBundle.getBundle(propertyFile); rb = ResourceBundle.getBundle(propertyFile);
sl = new ScannerLogger(); sl = new ScannerLogger();
try try
{ {
scanner = new LocalTibetanScanner(rb.getString(dictNameProperty), false); scanner = new LocalTibetanScanner(rb.getString(dictNameProperty), false);
} }
catch (Exception e) catch (Exception e)
{ {
sl.writeLog("Crash\tOnLineScannerFilter"); sl.writeLog("Crash\tOnLineScannerFilter");
sl.writeException(e); sl.writeException(e);
} }
dictionaries = scanner.getDictionaryDescriptions(); dictionaries = scanner.getDictionaryDescriptions();
sl.writeLog("Creation\tOnLineScannerFilter"); sl.writeLog("Creation\tOnLineScannerFilter");
} }
synchronized public void doGet(HttpServletRequest request, synchronized public void doGet(HttpServletRequest request,
HttpServletResponse response) //throws IOException, ServletException HttpServletResponse response) //throws IOException, ServletException
{ {
String answer, parrafo = null, checkboxName; String answer, parrafo = null, checkboxName;
// if this line is included in the constructor, it works on the orion server but not on wyllie! // if this line is included in the constructor, it works on the orion server but not on wyllie!
ThdlOptions.setUserPreference("thdl.rely.on.system.tmw.fonts", true); ThdlOptions.setUserPreference("thdl.rely.on.system.tmw.fonts", true);
ThdlOptions.setUserPreference("thdl.rely.on.system.tm.fonts", true); ThdlOptions.setUserPreference("thdl.rely.on.system.tm.fonts", true);
response.setContentType("text/html"); response.setContentType("text/html");
PrintWriter out; PrintWriter out;
sl.setUserIP(request.getRemoteAddr()); sl.setUserIP(request.getRemoteAddr());
try try
{ {
out = response.getWriter(); out = response.getWriter();
} }
catch (Exception e) catch (Exception e)
{ {
sl.writeLog("Crash\tOnLineScannerFilter"); sl.writeLog("Crash\tOnLineScannerFilter");
sl.writeException(e); sl.writeException(e);
return; return;
} }
BitDictionarySource ds=null; BitDictionarySource ds=null;
boolean checkedDicts[], allUnchecked, wantsTibetan, useTHDLBanner = (request.getParameter("thdlBanner")!=null); boolean checkedDicts[], allUnchecked, wantsTibetan, useTHDLBanner = (request.getParameter("thdlBanner")!=null);
// int percent=100; // int percent=100;
out.println("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">"); out.println("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">");
out.println("<html xmlns=\"http://www.w3.org/1999/xhtml\">"); out.println("<html xmlns=\"http://www.w3.org/1999/xhtml\">");
out.println("<head>"); out.println("<head>");
if (useTHDLBanner) if (useTHDLBanner)
{ {
out.println(" <title>Tibetan and Himalayan Digital Library - The Online Tibetan to English Translation/Dictionary Tool</title>"); out.println(" <title>Tibetan and Himalayan Digital Library - The Online Tibetan to English Translation/Dictionary Tool</title>");
out.println(" <meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />"); out.println(" <meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />");
out.println(" <script type=\"text/javascript\" src=\"/thdl/scripts/thdl_scripts.js\"></script>"); out.println(" <script type=\"text/javascript\" src=\"http://www.thdl.org/scripts/thdl_scripts.js\"></script>");
out.println(" <link rel=\"stylesheet\" type=\"text/css\" href=\"/thdl/style/thdl-styles.css\"/>"); out.println(" <link rel=\"stylesheet\" type=\"text/css\" href=\"http://www.thdl.org/style/thdl-styles.css\"/>");
} }
else else
out.println(" <title>The Online Tibetan to English Translation/Dictionary Tool</title>"); out.println(" <title>The Online Tibetan to English Translation/Dictionary Tool</title>");
out.println(" <meta name=\"keywords\" content=\"tibetan, english, dictionary, jim valby, rangjung yeshe, jeffrey hopkins, tsig mdzod chen mo, online, translation, scanner, parser, buddhism, language, processing, font, dharma, chos, tibet\">"); out.println(" <meta name=\"keywords\" content=\"tibetan, english, dictionary, jim valby, rangjung yeshe, jeffrey hopkins, tsig mdzod chen mo, online, translation, scanner, parser, buddhism, language, processing, font, dharma, chos, tibet\">");
out.println(" <meta name=\"Description\" content=\"This Java tool takes Tibetan language passages and divides the passages up into their component phrases and words, and displays corresponding dictionary definitions.\">"); out.println(" <meta name=\"Description\" content=\"This Java tool takes Tibetan language passages and divides the passages up into their component phrases and words, and displays corresponding dictionary definitions.\">");
out.println(" <meta name=\"MSSmartTagsPreventParsing\" content=\"TRUE\">"); out.println(" <meta name=\"MSSmartTagsPreventParsing\" content=\"TRUE\">");
answer = request.getParameter(scriptStr);
/* script==null || makes default tibetan
script!=null && makes default roman
*/
wantsTibetan = (answer==null || answer.equals(tibetanStr));
/*if (wantsTibetan)
{
out.println("<style>.tmw {font: 28pt TibetanMachineWeb}");
out.println(".tmw1 {font: 28pt TibetanMachineWeb1}");
out.println(".tmw2 {font: 28pt TibetanMachineWeb2}");
out.println(".tmw3 {font: 28pt TibetanMachineWeb3}");
out.println(".tmw4 {font: 28pt TibetanMachineWeb4}");
out.println(".tmw5 {font: 28pt TibetanMachineWeb5}");
out.println(".tmw6 {font: 28pt TibetanMachineWeb6}");
out.println(".tmw7 {font: 28pt TibetanMachineWeb7}");
out.println(".tmw8 {font: 28pt TibetanMachineWeb8}");
out.println(".tmw9 {font: 28pt TibetanMachineWeb9}");
out.println("</style>");
}*/
out.println("</head>");
out.println("<body>");
answer = request.getParameter(scriptStr);
/* script==null || makes default tibetan
script!=null && makes default roman
*/
wantsTibetan = (answer==null || answer.equals(tibetanStr));
if (wantsTibetan)
{
out.println("<style>.tmw {font: 28pt TibetanMachineWeb}");
out.println(".tmw1 {font: 28pt TibetanMachineWeb1}");
out.println(".tmw2 {font: 28pt TibetanMachineWeb2}");
out.println(".tmw3 {font: 28pt TibetanMachineWeb3}");
out.println(".tmw4 {font: 28pt TibetanMachineWeb4}");
out.println(".tmw5 {font: 28pt TibetanMachineWeb5}");
out.println(".tmw6 {font: 28pt TibetanMachineWeb6}");
out.println(".tmw7 {font: 28pt TibetanMachineWeb7}");
out.println(".tmw8 {font: 28pt TibetanMachineWeb8}");
out.println(".tmw9 {font: 28pt TibetanMachineWeb9}");
out.println("</style>");
}
out.println("</head>");
out.println("<body>");
if (useTHDLBanner) if (useTHDLBanner)
{ {
out.println("<script type=\"text/javascript\" src=\"/thdl/scripts/banner.js\"></script>"); out.println("<script type=\"text/javascript\" src=\"/thdl/scripts/banner.js\"></script>");
out.println("<div id=\"sub_banner\">"); out.println("<div id=\"sub_banner\">");
out.println("<div id=\"search\">"); out.println("<div id=\"search\">");
out.println(" <form method=\"get\" action=\"http://www.google.com/u/thdl\">"); out.println(" <form method=\"get\" action=\"http://www.google.com/u/thdl\">");
out.println(" <p>"); out.println(" <p>");
out.println(" <input type=\"text\" name=\"q\" id=\"q\" size=\"15\" maxlength=\"255\" value=\"\" />"); out.println(" <input type=\"text\" name=\"q\" id=\"q\" size=\"15\" maxlength=\"255\" value=\"\" />");
out.println(" <input type=\"submit\" name=\"sa\" id=\"sa\" value=\"Search\"/>"); out.println(" <input type=\"submit\" name=\"sa\" id=\"sa\" value=\"Search\"/>");
out.println(" <input type=\"hidden\" name=\"hq\" id=\"hq\" value=\"inurl:orion.lib.virginia.edu\"/>"); out.println(" <input type=\"hidden\" name=\"hq\" id=\"hq\" value=\"inurl:orion.lib.virginia.edu\"/>");
out.println(" </p>"); out.println(" </p>");
out.println(" </form>"); out.println(" </form>");
out.println(" </div>"); out.println(" </div>");
out.println(" <div id=\"breadcrumbs\">"); out.println(" <div id=\"breadcrumbs\">");
out.println(" <a href=\"/thdl/index.html\">Home</a> &gt; <a href=\"/thdl/reference/index.html\">Reference</a> &gt; Translation Tool"); out.println(" <a href=\"/thdl/index.html\">Home</a> &gt; <a href=\"/thdl/reference/index.html\">Reference</a> &gt; Translation Tool");
out.println(" </div>"); out.println(" </div>");
out.println("</div><!--END sub_banner-->"); out.println("</div><!--END sub_banner-->");
out.println("<div id=\"main\">"); out.println("<div id=\"main\">");
} }
out.println("<h3 align=\"center\">The Online Tibetan to English Translation/Dictionary Tool</h3>"); out.println("<h3 align=\"center\">The Online Tibetan to English Translation/Dictionary Tool</h3>");
try try
{ {
out.println(rb.getString(otherLinksProperty)); out.println(rb.getString(otherLinksProperty));
} }
catch (MissingResourceException e) catch (MissingResourceException e)
{ {
// do nothing // do nothing
} }
if (useTHDLBanner) if (useTHDLBanner)
{ {
out.println("<form action=\"org.thdl.tib.scanner.OnLineScannerFilter?thdlBanner=on\" method=POST>"); out.println("<form action=\"org.thdl.tib.scanner.OnLineScannerFilter?thdlBanner=on\" method=POST>");
} }
else else
{ {
out.println("<form action=\"org.thdl.tib.scanner.OnLineScannerFilter\" method=POST>"); out.println("<form action=\"org.thdl.tib.scanner.OnLineScannerFilter\" method=POST>");
} }
out.println("<table border=\"0\" width=\"100%\">"); out.println("<table border=\"0\" width=\"100%\">");
out.println(" <tr>"); out.println(" <tr>");
out.println(" <td width=\"25%\">"); out.println(" <td width=\"25%\">");
out.println(" <p>Display results in:</td>"); out.println(" <p>Display results in:</td>");
out.println(" <td width=\"75%\">"); out.println(" <td width=\"75%\">");
out.println(" <p><input type=\"radio\" value=\"" + tibetanStr + "\" "); out.println(" <p><input type=\"radio\" value=\"" + tibetanStr + "\" ");
if (wantsTibetan) out.println("checked "); if (wantsTibetan) out.println("checked ");
out.println("name=\"" + scriptStr + "\">Tibetan script (using <a href=\"http://orion.lib.virginia.edu/thdl/tools/tmw.html\" target=\"_blank\">Tibetan Machine Web font</a>)<br/>"); out.println("name=\"" + scriptStr + "\">Tibetan script (using <a href=\"http://www.thdl.org/xml/show.php?xml=/tools/tibfonts.xml&l=uva10928423419921\" target=\"_blank\">Tibetan Machine Uni font</a>)<br/>");
out.println(" <input type=\"radio\" value=\"roman\" "); out.println(" <input type=\"radio\" value=\"roman\" ");
if (!wantsTibetan) out.println("checked "); if (!wantsTibetan) out.println("checked ");
out.println("name=\"" + scriptStr + "\">Roman script</td>"); out.println("name=\"" + scriptStr + "\">Roman script</td>");
out.println(" </tr>"); out.println(" </tr>");
out.println("</table>"); out.println("</table>");
if (dictionaries!=null) if (dictionaries!=null)
{ {
int i; int i;
ds = scanner.getDictionarySource(); ds = scanner.getDictionarySource();
ds.reset(); ds.reset();
checkedDicts = new boolean[dictionaries.length]; checkedDicts = new boolean[dictionaries.length];
/* out.println(" <tr>"); /* out.println(" <tr>");
out.println("<td width=\""+ percent +"%\">Search in dictionaries:</td>");*/ out.println("<td width=\""+ percent +"%\">Search in dictionaries:</td>");*/
out.println("<p>Search in dictionaries: "); out.println("<p>Search in dictionaries: ");
allUnchecked=true; allUnchecked=true;
for (i=0; i<dictionaries.length; i++) for (i=0; i<dictionaries.length; i++)
@ -244,50 +244,52 @@ public class OnLineScannerFilter extends HttpServlet
else ds = BitDictionarySource.getAllDictionaries(); else ds = BitDictionarySource.getAllDictionaries();
// out.println("</table>"); // out.println("</table>");
out.println("</p>"); out.println("</p>");
out.println("<table border=\"0\" width=\"100%\">"); out.println("<table border=\"0\" width=\"100%\">");
out.println(" <tr>"); out.println(" <tr>");
out.println(" <td width=\"35%\">"); out.println(" <td width=\"35%\">");
out.println(" <p><strong>Input text:</strong></p>"); out.println(" <p><strong>Input text:</strong></p>");
out.println(" </td>"); out.println(" </td>");
out.println(" <td width=\"65%\">"); out.println(" <td width=\"65%\">");
out.println(" <p> <input type=\"submit\" name=\"" + buttonStr + "\" value=\"Translate\"> <input type=\"submit\" name=\"" + buttonStr + "\" value=\"" + clearStr + "\"></p>"); out.println(" <p> <input type=\"submit\" name=\"" + buttonStr + "\" value=\"Translate\"> <input type=\"submit\" name=\"" + buttonStr + "\" value=\"" + clearStr + "\"></p>");
out.println(" </td>"); out.println(" </td>");
out.println(" </tr>"); out.println(" </tr>");
out.println("</table>"); out.println("</table>");
out.println("<textarea rows=\"12\" name=\"parrafo\" cols=\"60\">"); out.print("<textarea rows=\"5\" name=\"parrafo\" cols=\"40\"");
if (wantsTibetan) out.print(" class=\"tib\"");
// Paragraph should be empty if the user just clicked the clear button out.println(">");
// Paragraph should be empty if the user just clicked the clear button
answer = request.getParameter(buttonStr); answer = request.getParameter(buttonStr);
if (answer == null || answer != null && !answer.equals(clearStr)) if (answer == null || answer != null && !answer.equals(clearStr))
{ {
parrafo = request.getParameter("parrafo"); parrafo = request.getParameter("parrafo");
if (parrafo!=null) out.print(parrafo); if (parrafo!=null) out.print(parrafo);
} }
out.println("</textarea>");
out.println("</form>");
if (parrafo != null) out.println("</textarea>");
{ out.println("</form>");
sl.writeLog("Translation\tOnLineScannerFilter");
if (ds!=null && !ds.isEmpty()) if (parrafo != null)
desglosar(parrafo, out, wantsTibetan); {
} sl.writeLog("Translation\tOnLineScannerFilter");
else sl.writeLog("Invocation\tOnLineScannerFilter"); if (ds!=null && !ds.isEmpty())
desglosar(parrafo, out, wantsTibetan);
}
else sl.writeLog("Invocation\tOnLineScannerFilter");
out.println(TibetanScanner.copyrightHTML); out.println(TibetanScanner.copyrightHTML);
if (useTHDLBanner) out.println("</div><!--END main-->"); if (useTHDLBanner) out.println("</div><!--END main-->");
out.println("</body>"); out.println("</body>");
out.println("</html>"); out.println("</html>");
} }
public void doPost(HttpServletRequest request, public void doPost(HttpServletRequest request,
HttpServletResponse response) HttpServletResponse response)
//throws IOException, ServletException //throws IOException, ServletException
{ {
doGet(request, response); doGet(request, response);
} }
synchronized public void desglosar(String in, PrintWriter pw, boolean tibetan) synchronized public void desglosar(String in, PrintWriter pw, boolean tibetan)
{ {
@ -298,22 +300,25 @@ public class OnLineScannerFilter extends HttpServlet
if (!in.equals("")) if (!in.equals(""))
{ {
/* while (hayMasLineas) /* while (hayMasLineas)
{ {
fin = in.indexOf("\n",init); fin = in.indexOf("\n",init);
if (fin<0) if (fin<0)
{ {
linea = in.substring(init).trim(); linea = in.substring(init).trim();
hayMasLineas=false; hayMasLineas=false;
} }
else else
linea = in.substring(init, fin).trim(); linea = in.substring(init, fin).trim();
scanner.scanBody(linea); scanner.scanBody(linea);
init = fin+1; init = fin+1;
} */ } */
scanner.clearTokens(); scanner.clearTokens();
in = Manipulate.NCR2UnicodeString(in);
if (Manipulate.guessIfUnicode(in)) in = Manipulate.unicodeToWylie(in);
else if (Manipulate.guessIfAcip(in)) in = Manipulate.acipToWylie(in);
scanner.scanBody(in); scanner.scanBody(in);
scanner.finishUp(); scanner.finishUp();
printText(pw, tibetan); printText(pw, tibetan);
@ -335,35 +340,35 @@ public class OnLineScannerFilter extends HttpServlet
for (i=0; i < words.length; i++) for (i=0; i < words.length; i++)
{ {
if (words[i] instanceof Word) if (words[i] instanceof Word)
{ {
word = new SwingWord((Word)words[i]); word = new SwingWord((Word)words[i]);
// if (word.getDefs().getDictionarySource()!=null) // if (word.getDefs().getDictionarySource()!=null)
pw.print(word.getLink()); pw.print(word.getLink(tibetan));
// else pw.print(word.getWylie() + " "); // else pw.print(word.getWylie() + " ");
} }
else else
{ {
if (words[i] instanceof PunctuationMark) if (words[i] instanceof PunctuationMark)
{ {
pm = words[i].toString().charAt(0); pm = words[i].toString().charAt(0);
switch (pm) switch (pm)
{ {
case '\n': case '\n':
pw.println("</p>"); pw.println("</p>");
pw.print("<p>"); pw.print("<p>");
break; break;
case '<': case '<':
pw.print("&lt; "); pw.print("&lt; ");
break; break;
case '>': case '>':
pw.print("&gt; "); pw.print("&gt; ");
break; break;
default: default:
pw.print(pm + " "); pw.print(pm + " ");
} }
} }
} }
} }
pw.println("</p>"); pw.println("</p>");
} }
@ -376,17 +381,17 @@ public class OnLineScannerFilter extends HttpServlet
String tag; String tag;
DictionarySource ds; DictionarySource ds;
ByteDictionarySource sourceb=null; ByteDictionarySource sourceb=null;
words = scanner.getWordArray(false); words = scanner.getWordArray(false);
if (words == null) if (words == null)
return; return;
pw.println("<table border=\"1\" width=\"100%\">"); pw.println("<table border=\"1\" width=\"100%\">");
for (j = 0; j < words.length; j++) { for (j = 0; j < words.length; j++) {
try { try {
word = new SwingWord(words[j]); word = new SwingWord(words[j]);
defs = word.getDefs(); defs = word.getDefs();
ds = defs.getDictionarySource(); ds = defs.getDictionarySource();
@ -400,19 +405,20 @@ public class OnLineScannerFilter extends HttpServlet
} }
else { else {
sourceb = (ByteDictionarySource) ds; sourceb = (ByteDictionarySource) ds;
k=0; k=0;
while (sourceb.isEmpty(k)) k++; while (sourceb.isEmpty(k)) k++;
tag = sourceb.getTag(k); tag = sourceb.getTag(k);
k++; k++;
} }
} }
pw.println(" <td width=\"20%\" rowspan=\"" + defs.def.length pw.print(" <td width=\"20%\" rowspan=\"" + defs.def.length
+ "\" valign=\"top\">" + word.getBookmark(tibetan) + "\" valign=\"top\"");
+ "</td>"); if (tibetan) pw.print(" class=\"tib\"");
pw.println(">" + word.getBookmark(tibetan) + "</td>");
pw.println(" <td width=\"12%\">" + tag + "</td>"); pw.println(" <td width=\"12%\">" + tag + "</td>");
pw.println(" <td width=\"68%\">" + defs.def[0] + "</td>"); pw.println(" <td width=\"68%\">" + defs.def[0] + "</td>");
pw.println(" </tr>"); pw.println(" </tr>");
for (i = 1; i < defs.def.length; i++) { for (i = 1; i < defs.def.length; i++) {
pw.println(" <tr>"); pw.println(" <tr>");
@ -421,9 +427,9 @@ public class OnLineScannerFilter extends HttpServlet
tag = ds.getTag(i); tag = ds.getTag(i);
} }
else { else {
while (sourceb.isEmpty(k)) k++; while (sourceb.isEmpty(k)) k++;
tag = sourceb.getTag(k); tag = sourceb.getTag(k);
k++; k++;
} }
pw.println(" <td width=\"12%\">" + tag + "</td>"); pw.println(" <td width=\"12%\">" + tag + "</td>");
@ -435,17 +441,17 @@ public class OnLineScannerFilter extends HttpServlet
sl.writeLog("Crash\tOnLineScannerFilter\t" + word.getWylie()); sl.writeLog("Crash\tOnLineScannerFilter\t" + word.getWylie());
sl.writeException(e); sl.writeException(e);
} }
} }
pw.println("</table>"); pw.println("</table>");
} }
public void destroy() public void destroy()
{ {
super.destroy(); super.destroy();
sl.setUserIP(null); sl.setUserIP(null);
sl.writeLog("Shutdown\tOnLineScannerFilter"); sl.writeLog("Shutdown\tOnLineScannerFilter");
scanner.destroy(); scanner.destroy();
} }
} }

View file

@ -1,20 +1,20 @@
/* /*
The contents of this file are subject to the AMP Open Community License The contents of this file are subject to the AMP Open Community License
Version 1.0 (the "License"); you may not use this file except in compliance Version 1.0 (the "License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License on the AMP web site with the License. You may obtain a copy of the License on the AMP web site
(http://www.tibet.iteso.mx/Guatemala/). (http://www.tibet.iteso.mx/Guatemala/).
Software distributed under the License is distributed on an "AS IS" basis, Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
License for the specific terms governing rights and limitations under the License for the specific terms governing rights and limitations under the
License. License.
The Initial Developer of this software is Andres Montano Pellegrini. Portions The Initial Developer of this software is Andres Montano Pellegrini. Portions
created by Andres Montano Pellegrini are Copyright 2001 Andres Montano created by Andres Montano Pellegrini are Copyright 2001 Andres Montano
Pellegrini. All Rights Reserved. Pellegrini. All Rights Reserved.
Contributor(s): ______________________________________. Contributor(s): ______________________________________.
*/ */
package org.thdl.tib.scanner; package org.thdl.tib.scanner;
@ -29,127 +29,136 @@ import javax.swing.text.BadLocationException;
import org.thdl.tib.input.DuffPane; import org.thdl.tib.input.DuffPane;
import org.thdl.tib.text.TibetanDocument; import org.thdl.tib.text.TibetanDocument;
import org.thdl.tib.text.reverter.*;
import org.thdl.util.RTFFixerInputStream; import org.thdl.util.RTFFixerInputStream;
import org.thdl.util.ThdlDebug; import org.thdl.util.ThdlDebug;
import org.thdl.util.ThdlOptions; import org.thdl.util.ThdlOptions;
/** Identical to DuffPane except that it only supports Tibetan script in /** Identical to DuffPane except that it only supports Tibetan script in
TibetanMachineWeb. No roman script can be inputted. If roman script is TibetanMachineWeb. No roman script can be inputted. If roman script is
pasted, it is assumed that it is either ACIP or wylie and is converted pasted, it is assumed that it is either ACIP or wylie and is converted
accordingly to TibetanMachineWeb. If text in TibetanMachine is pasted, accordingly to TibetanMachineWeb. If text in TibetanMachine is pasted,
it is converted to TibetanMachineWeb. Any other font is assumed to be it is converted to TibetanMachineWeb. Any other font is assumed to be
Roman script. Roman script.
*/ */
public class StrictDuffPane extends DuffPane public class StrictDuffPane extends DuffPane
{ {
public StrictDuffPane() public StrictDuffPane()
{ {
super(); super();
disableRoman(); disableRoman();
} }
/** Smart paste! Automatically recognizes what is being pasted and converts /** Smart paste! Automatically recognizes what is being pasted and converts
respectively. Currently it supports pasting from TibetanMachineWeb, respectively. Currently it supports pasting from TibetanMachineWeb,
TibetanMachine, wylie, and ACIP. TibetanMachine, wylie, and ACIP.
*/ */
public void paste(int offset) public void paste(int offset)
{ {
// Respect setEditable(boolean): boolean pasteAsString = false;
if (!this.isEditable())
return; // Respect setEditable(boolean):
if (!this.isEditable())
try return;
{
Transferable contents = rtfBoard.getContents(this); try
{
if (contents.isDataFlavorSupported(rtfFlavor)){ Transferable contents = rtfBoard.getContents(this);
InputStream in = (InputStream)contents.getTransferData(rtfFlavor); if (contents.isDataFlavorSupported(rtfFlavor)){
int p1 = offset;
InputStream in = (InputStream)contents.getTransferData(rtfFlavor);
//construct new document that contains only portion of text you want to paste int p1 = offset;
TibetanDocument sd = new TibetanDocument();
//construct new document that contains only portion of text you want to paste
// I swear this happened once when I pasted in some TibetanDocument sd = new TibetanDocument();
// random junk just after Jskad started up.
ThdlDebug.verify(null != in); // I swear this happened once when I pasted in some
// random junk just after Jskad started up.
boolean errorReading = false; ThdlDebug.verify(null != in);
try boolean errorReading = false;
{
if (!ThdlOptions.getBooleanOption("thdl.do.not.fix.rtf.hex.escapes")) try
in = new RTFFixerInputStream(in); {
rtfEd.read(in, sd, 0); if (!ThdlOptions.getBooleanOption("thdl.do.not.fix.rtf.hex.escapes"))
} catch (Exception e) { in = new RTFFixerInputStream(in);
rtfEd.read(in, sd, 0);
errorReading = true; } catch (Exception e) {
/* If fonts weren't supported and we don't know what it is try to paste errorReading = true;
ACIP or wylie.
*/ /* If fonts weren't supported and we don't know what it is try to paste
if (contents.isDataFlavorSupported(DataFlavor.stringFlavor)) ACIP or wylie.
{ */
String data = (String)contents.getTransferData(DataFlavor.stringFlavor); if (contents.isDataFlavorSupported(DataFlavor.stringFlavor))
if (Manipulate.guessIfAcip(data)) data = Manipulate.acipToWylie(data); {
toTibetanMachineWeb(data, offset); pasteAsString = true;
} }
// JOptionPane.showMessageDialog(this, "You cannot paste from the application from which you copied.\nIt uses an RTF format that is too advanced for the version\nof Java Jskad is running atop."); // JOptionPane.showMessageDialog(this, "You cannot paste from the application from which you copied.\nIt uses an RTF format that is too advanced for the version\nof Java Jskad is running atop.");
} }
if (!errorReading) if (!errorReading)
{ {
/* If it is any font beside TibetanMachine and TibetanMachineWeb /* If it is any font beside TibetanMachine and TibetanMachineWeb
assume it is wylie or Acip. assume it is wylie or Acip.
*/ */
if (!sd.getFont((sd.getCharacterElement(0).getAttributes())).getFamily().startsWith("TibetanMachine") if (!sd.getFont((sd.getCharacterElement(0).getAttributes())).getFamily().startsWith("TibetanMachine")
&& contents.isDataFlavorSupported(DataFlavor.stringFlavor)) && contents.isDataFlavorSupported(DataFlavor.stringFlavor))
{ {
String data = (String)contents.getTransferData(DataFlavor.stringFlavor); pasteAsString = true;
if (Manipulate.guessIfAcip(data)) data = Manipulate.acipToWylie(data); }
toTibetanMachineWeb(data, offset); else
} {
else // If it's font is TibetanMachine, convert to TibetanMachineWeb first
{ if (sd.getFont((sd.getCharacterElement(0).getAttributes())).getFamily().equals("TibetanMachine"))
// If it's font is TibetanMachine, convert to TibetanMachineWeb first {
if (sd.getFont((sd.getCharacterElement(0).getAttributes())).getFamily().equals("TibetanMachine")) StringBuffer errors = new StringBuffer();
{ long numAttemptedReplacements[] = new long[] { 0 };
StringBuffer errors = new StringBuffer(); sd.convertToTMW(0, -1, errors, numAttemptedReplacements);
long numAttemptedReplacements[] = new long[] { 0 }; }
sd.convertToTMW(0, -1, errors, numAttemptedReplacements);
} for (int i=0; i<sd.getLength()-1; i++) { //getLength()-1 so that final newline is not included in paste
try
for (int i=0; i<sd.getLength()-1; i++) { //getLength()-1 so that final newline is not included in paste {
try String s = sd.getText(i,1);
{ AttributeSet as = sd.getCharacterElement(i).getAttributes();
String s = sd.getText(i,1); getTibDoc().insertString(p1+i, s, as);
AttributeSet as = sd.getCharacterElement(i).getAttributes(); } catch (BadLocationException ble)
getTibDoc().insertString(p1+i, s, as); {
} catch (BadLocationException ble) ble.printStackTrace();
{ ThdlDebug.noteIffyCode();
ble.printStackTrace(); }
ThdlDebug.noteIffyCode(); }
} }
} }
} }
} else if (contents.isDataFlavorSupported(DataFlavor.stringFlavor))
} {
else if (contents.isDataFlavorSupported(DataFlavor.stringFlavor)) // if it is not in a font, assume it is wylie or ACIP.
{ pasteAsString = true;
// if it is not in a font, assume it is wylie or ACIP. }
String data = (String)contents.getTransferData(DataFlavor.stringFlavor);
if (Manipulate.guessIfAcip(data)) data = Manipulate.acipToWylie(data); if (pasteAsString)
toTibetanMachineWeb(data, offset); {
} String data = (String)contents.getTransferData(DataFlavor.stringFlavor);
} catch (UnsupportedFlavorException ufe) { if (Manipulate.guessIfUnicode(data))
ufe.printStackTrace(); {
ThdlDebug.noteIffyCode(); StringBuffer errors = new StringBuffer();
} catch (IOException ioe) { data = Converter.convertToEwtsForComputers(data, errors);
ioe.printStackTrace(); } else if (Manipulate.guessIfAcip(data)) data = Manipulate.acipToWylie(data);
ThdlDebug.noteIffyCode(); toTibetanMachineWeb(data, offset);
} catch (IllegalStateException ise) { }
ise.printStackTrace();
ThdlDebug.noteIffyCode(); } catch (UnsupportedFlavorException ufe) {
} ufe.printStackTrace();
} ThdlDebug.noteIffyCode();
} catch (IOException ioe) {
ioe.printStackTrace();
ThdlDebug.noteIffyCode();
} catch (IllegalStateException ise) {
ise.printStackTrace();
ThdlDebug.noteIffyCode();
}
}
} }

View file

@ -20,7 +20,8 @@ Contributor(s): ______________________________________.
to store the dictionary. */ to store the dictionary. */
package org.thdl.tib.scanner; package org.thdl.tib.scanner;
import org.thdl.tib.text.TibetanHTML; //import org.thdl.tib.text.TibetanHTML;
import org.thdl.tib.text.ttt.*;
/** Tibetan word with its corresponding definitions. /** Tibetan word with its corresponding definitions.
@ -60,7 +61,8 @@ public class SwingWord extends Word
{ {
try try
{ {
localWord = TibetanHTML.getHTML(super.token + " "); // localWord = TibetanHTML.getHTML(super.token + " ");
localWord = Manipulate.UnicodeString2NCR(EwtsToUnicodeForXslt.convertEwtsTo(super.token + " "));
} }
catch (Exception e) catch (Exception e)
{ {
@ -82,6 +84,7 @@ public class SwingWord extends Word
public String getLink(boolean tibetan) public String getLink(boolean tibetan)
{ {
String localWord, result=null; String localWord, result=null;
String className = "";
if (wordSinDec==null) localWord = super.token; if (wordSinDec==null) localWord = super.token;
else localWord = wordSinDec; else localWord = wordSinDec;
@ -89,7 +92,8 @@ public class SwingWord extends Word
{ {
try try
{ {
result = TibetanHTML.getHTML(localWord + " "); result = Manipulate.UnicodeString2NCR(EwtsToUnicodeForXslt.convertEwtsTo(localWord + " "));
className = " class = \"tib\"";
} }
catch (Exception e) catch (Exception e)
{ {
@ -101,6 +105,6 @@ public class SwingWord extends Word
if (tibetan) result+= "</a>"; if (tibetan) result+= "</a>";
else result+= "</a> "; else result+= "</a> ";
return result;*/ return result;*/
return "<a href=\"#" + super.token + "\">" + result + "</a> "; return "<a href=\"#" + super.token + "\"" + className + ">" + result + "</a> ";
} }
} }

View file

@ -27,7 +27,7 @@ import org.thdl.util.ThdlVersion;
*/ */
public abstract class TibetanScanner public abstract class TibetanScanner
{ {
public static final String version = "The Tibetan to English Translation Tool, version 3.2.1 compiled on " + ThdlVersion.getTimeOfCompilation() + ". "; public static final String version = "The Tibetan to English Translation Tool, version 3.3.0 compiled on " + ThdlVersion.getTimeOfCompilation() + ". ";
public static final String copyrightUnicode="Copyright " + '\u00A9' + " 2000-2005 by Andr" + '\u00E9' + "s Montano Pellegrini, all rights reserved."; public static final String copyrightUnicode="Copyright " + '\u00A9' + " 2000-2005 by Andr" + '\u00E9' + "s Montano Pellegrini, all rights reserved.";
public static final String copyrightASCII="Copyright 2000-2005 by Andres Montano Pellegrini, all rights reserved."; public static final String copyrightASCII="Copyright 2000-2005 by Andres Montano Pellegrini, all rights reserved.";
public static final String copyrightHTML="<hr><small><strong>" + version + "Copyright &copy; 2000-2005 by <a href=\"http://www.people.virginia.edu/~am2zb/\" target=\"_blank\">Andr&eacute;s Montano Pellegrini.</a><br/>All rights reserved.</strong></small>"; public static final String copyrightHTML="<hr><small><strong>" + version + "Copyright &copy; 2000-2005 by <a href=\"http://www.people.virginia.edu/~am2zb/\" target=\"_blank\">Andr&eacute;s Montano Pellegrini.</a><br/>All rights reserved.</strong></small>";