Updated the translation tool to accept Tibetan Unicode. For the application version, this means that the smart paste is unicode aware and will do the appropriate conversion. In the servlet version tibetan unicode can now be inputted in the form and tibetan machine uni is used to display the results.

The Manipulate class now includes: acipToWylie, wylieToAcip, and unicodeToWylie. They provide a simple interfase to David Chandler's converters that are used by the translation tool.
This commit is contained in:
amontano 2006-04-24 06:09:17 +00:00
parent 2a47ff68a4
commit 67bddb7a7e
6 changed files with 851 additions and 717 deletions

View file

@ -1,44 +1,44 @@
/*
The contents of this file are subject to the AMP Open Community License
Version 1.0 (the "License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License on the AMP web site
(http://www.tibet.iteso.mx/Guatemala/).
Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
License for the specific terms governing rights and limitations under the
License.
The Initial Developer of this software is Andres Montano Pellegrini. Portions
created by Andres Montano Pellegrini are Copyright 2001 Andres Montano
Pellegrini. All Rights Reserved.
Contributor(s): ______________________________________.
*/
The contents of this file are subject to the AMP Open Community License
Version 1.0 (the "License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License on the AMP web site
(http://www.tibet.iteso.mx/Guatemala/).
Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
License for the specific terms governing rights and limitations under the
License.
The Initial Developer of this software is Andres Montano Pellegrini. Portions
created by Andres Montano Pellegrini are Copyright 2001 Andres Montano
Pellegrini. All Rights Reserved.
Contributor(s): ______________________________________.
*/
package org.thdl.tib.scanner;
import java.util.Enumeration;
import java.util.Vector;
/** Loads dictionary stored in tree format and searches for words recursively.
How the the dictionary is loaded depends on which implementation of
{@link SyllableListTree} is invoked.
@author Andrés Montano Pellegrini
@see SyllableListTree
*/
How the the dictionary is loaded depends on which implementation of
{@link SyllableListTree} is invoked.
@author Andrés Montano Pellegrini
@see SyllableListTree
*/
public class LocalTibetanScanner extends TibetanScanner
{
public static String archivo;
private SyllableListTree raiz, silActual, lastCompSil, silAnterior;
private String wordActual, lastCompWord;
private Vector floatingSil;
static
{
archivo = null;
}
public BitDictionarySource getDictionarySource()
{
return raiz.getDictionarySourcesWanted();
@ -46,12 +46,12 @@ public class LocalTibetanScanner extends TibetanScanner
public LocalTibetanScanner(String arch) throws Exception
{
this (arch, true);
this (arch, true);
}
public LocalTibetanScanner(String arch, boolean backwardCompatible) throws Exception
{
super();
super();
archivo = arch;
// raiz = new MemorySyllableListTree(archivo);
// raiz = new FileSyllableListTree(archivo);
@ -59,13 +59,13 @@ public class LocalTibetanScanner extends TibetanScanner
floatingSil = new Vector();
resetAll();
}
private void resetAll()
{
silAnterior = silActual = lastCompSil = null;
wordActual = lastCompWord = null;
}
private void scanSyllable(String sil)
{
SyllableListTree resultado=null;
@ -73,13 +73,13 @@ public class LocalTibetanScanner extends TibetanScanner
Word w;
String silSinDec;
boolean aadded;
if (silActual==null)
silActual = raiz;
silAnterior = silActual;
silActual = silActual.lookUp(sil);
if (silActual != null)
{
if (silActual.hasDef())
@ -113,9 +113,9 @@ public class LocalTibetanScanner extends TibetanScanner
}
else
{
resultado = null;
if (aadded) silSinDec = silSinDec.substring(0, silSinDec.length()-1);
silSinDec = withOutDec(silSinDec);
resultado = null;
if (aadded) silSinDec = silSinDec.substring(0, silSinDec.length()-1);
silSinDec = withOutDec(silSinDec);
}
}
if (resultado!=null) return;
@ -134,9 +134,9 @@ public class LocalTibetanScanner extends TibetanScanner
{
resultado = silAnterior.lookUp(silSinDec);
/* here we don't have to worry about being in the middle of a
word since the declension marks that it is the end of a
word.
*/
word since the declension marks that it is the end of a
word.
*/
if (resultado == null || !resultado.hasDef())
{
silSinDec += "\'";
@ -153,26 +153,26 @@ public class LocalTibetanScanner extends TibetanScanner
}
else
{
resultado = null;
if (aadded) silSinDec = silSinDec.substring(0, silSinDec.length()-1);
silSinDec = withOutDec(silSinDec);
resultado = null;
if (aadded) silSinDec = silSinDec.substring(0, silSinDec.length()-1);
silSinDec = withOutDec(silSinDec);
}
}
if (resultado!=null) return;
if (lastCompSil!=null)
{
if (lastCompWord.equals(wordActual)) w = new Word(lastCompWord, lastCompSil.getDefs());
if (lastCompWord.equals(wordActual)) w = new Word(lastCompWord, lastCompSil.getDefs());
else w = new Word(lastCompWord, wordActual, lastCompSil.getDefs());
wordList.addLast(w);
this.resetAll();
enumeration = floatingSil.elements();
floatingSil = new Vector();
while (enumeration.hasMoreElements())
scanSyllable((String)enumeration.nextElement());
scanSyllable(sil);
}
else
@ -193,32 +193,32 @@ public class LocalTibetanScanner extends TibetanScanner
}
}
}
public void finishUp()
{
Enumeration enumeration;
Word w;
while (lastCompSil!=null)
{
if (lastCompWord.equals(wordActual)) w = new Word(lastCompWord, lastCompSil.getDefs());
else w = new Word(lastCompWord, wordActual, lastCompSil.getDefs());
wordList.addLast(w);
this.resetAll();
enumeration = floatingSil.elements();
floatingSil = new Vector();
while (enumeration.hasMoreElements())
scanSyllable((String)enumeration.nextElement());
}
if (silActual!=null)
{
wordList.addLast(new Word(wordActual, "[incomplete word]"));
this.resetAll();
}
}
private static String concatWithSpace(String s1, String s2)
{
if (s1==null || s1.equals(""))
@ -226,14 +226,14 @@ public class LocalTibetanScanner extends TibetanScanner
else
return s1 + ' ' + s2;
}
private static String withOutDec(String sil)
{
boolean isDeclined =false;
int len = sil.length(), apos;
if (len<3) return null;
char lastCar = Character.toLowerCase(sil.charAt(len-1));
if ((lastCar == 's' || lastCar == 'r') && Manipulate.isVowel(sil.charAt(len-2)))
{
@ -242,19 +242,19 @@ public class LocalTibetanScanner extends TibetanScanner
}
else
{
apos = sil.lastIndexOf('\'');
if (apos>0 && apos < len-1 && Manipulate.isVowel(sil.charAt(apos-1)) && sil.charAt(apos+1)!='u')
{
isDeclined=true;
sil = sil.substring(0, apos);
}
/* if ((lastCar == 'i' || lastCar == 'o') && sil.charAt(len-2)=='\'')
{
isDeclined=true;
sil = sil.substring(0, len-2);
}*/
apos = sil.lastIndexOf('\'');
if (apos>0 && apos < len-1 && Manipulate.isVowel(sil.charAt(apos-1)) && sil.charAt(apos+1)!='u')
{
isDeclined=true;
sil = sil.substring(0, apos);
}
/* if ((lastCar == 'i' || lastCar == 'o') && sil.charAt(len-2)=='\'')
{
isDeclined=true;
sil = sil.substring(0, len-2);
}*/
}
if (!isDeclined) return null;
return sil;
}
@ -262,127 +262,127 @@ public class LocalTibetanScanner extends TibetanScanner
public void scanBody(String in)
{
boolean hayMasLineas=true;
if (in.equals("")) finishUp();
else
{
int init = 0, fin;
String linea;
while (hayMasLineas)
{
fin = in.indexOf("\n",init);
if (fin<0)
{
linea = in.substring(init).trim();
hayMasLineas=false;
linea = in.substring(init).trim();
hayMasLineas=false;
}
else
linea = in.substring(init, fin).trim();
linea = in.substring(init, fin).trim();
if (linea.equals(""))
{
finishUp();
wordList.addLast(new PunctuationMark('\n'));
finishUp();
wordList.addLast(new PunctuationMark('\n'));
}
else
scanLine(linea);
scanLine(linea);
init = fin+1;
}
}
}
public void scanLine(String linea)
{
int init = 0, fin;
char ch;
String sil;
boolean doNotFinishUp;
if (linea.equals(""))
{
finishUp();
wordList.addLast(new PunctuationMark('\n'));
return;
finishUp();
wordList.addLast(new PunctuationMark('\n'));
return;
}
outAHere:
while(true)
{
doNotFinishUp=true;
// Make init skip all punctuation marks
while (true)
outAHere:
while(true)
{
if (init>=linea.length())
break outAHere;
ch = linea.charAt(init);
if (Manipulate.isPunctuationMark(ch))
{
if (doNotFinishUp)
{
finishUp();
doNotFinishUp=false;
}
wordList.addLast(new PunctuationMark(ch));
}
else if (!Manipulate.isEndOfSyllableMark(ch))
break;
init++;
}
doNotFinishUp = true;
/* move fin to the end of the next syllable. If finishing
up is necessary it is done after scanSyllable
*/
fin = init+1;
while (fin < linea.length())
{
ch = linea.charAt(fin);
if (Manipulate.isPunctuationMark(ch))
{
doNotFinishUp = false;
break;
}
else if (Manipulate.isEndOfSyllableMark(ch))
{
break;
}
else
{
fin++;
if (fin>=linea.length())
break;
doNotFinishUp=true;
// Make init skip all punctuation marks
while (true)
{
if (init>=linea.length())
break outAHere;
ch = linea.charAt(init);
if (Manipulate.isPunctuationMark(ch))
{
if (doNotFinishUp)
{
finishUp();
doNotFinishUp=false;
}
wordList.addLast(new PunctuationMark(ch));
}
else if (!Manipulate.isEndOfSyllableMark(ch))
break;
init++;
}
doNotFinishUp = true;
/* move fin to the end of the next syllable. If finishing
up is necessary it is done after scanSyllable
*/
fin = init+1;
while (fin < linea.length())
{
ch = linea.charAt(fin);
if (Manipulate.isPunctuationMark(ch))
{
doNotFinishUp = false;
break;
}
else if (Manipulate.isEndOfSyllableMark(ch))
{
break;
}
else
{
fin++;
if (fin>=linea.length())
break;
}
}
sil = linea.substring(init, fin);
scanSyllable(sil);
if (!doNotFinishUp)
{
finishUp();
wordList.addLast(new PunctuationMark(ch));
}
init = fin+1;
}
sil = linea.substring(init, fin);
scanSyllable(sil);
if (!doNotFinishUp)
{
finishUp();
wordList.addLast(new PunctuationMark(ch));
}
init = fin+1;
}
}
/** Looks for .dic file, and returns the dictionary descriptions.
Also updates the definitionTags in the Definitions class.
*/
public String[] getDictionaryDescriptions()
{
return FileSyllableListTree.getDictionaryDescriptions(archivo);
}
public void destroy()
{
FileSyllableListTree.closeFiles();
}
/** Looks for .dic file, and returns the dictionary descriptions.
Also updates the definitionTags in the Definitions class.
*/
public String[] getDictionaryDescriptions()
{
return FileSyllableListTree.getDictionaryDescriptions(archivo);
}
public void destroy()
{
FileSyllableListTree.closeFiles();
}
}

View file

@ -17,6 +17,10 @@ Contributor(s): ______________________________________.
*/
package org.thdl.tib.scanner;
import org.thdl.tib.text.*;
import org.thdl.tib.text.reverter.*;
/** Miscelaneous static methods for the manipulation of Tibetan text.
@author Andr&eacute;s Montano Pellegrini
@ -24,7 +28,6 @@ package org.thdl.tib.scanner;
public class Manipulate
{
private static String endOfParagraphMarks = "/;|!:^@#$%=";
private static String bracketMarks = "<>(){}[]";
private static String endOfSyllableMarks = " _\t";
@ -177,70 +180,7 @@ public class Manipulate
{
ch = Character.toLowerCase(ch);
return ch=='a' || ch=='e' || ch=='i' || ch=='o' || ch=='u';
}
public static String wylieToAcip(String palabra)
{
// DLC FIXME: for unknown things, return null.
if (palabra.equals("@##")) return "#";
if (palabra.equals("@#")) return "*";
if (palabra.equals("!")) return "`";
if (palabra.equals("b+h")) return "BH";
if (palabra.equals("d+h")) return "DH";
if (palabra.equals("X")) return null;
if (palabra.equals("iA")) return null;
if (palabra.equals("ai")) return "EE";
if (palabra.equals("au")) return "OO";
if (palabra.equals("$")) return null;
if (palabra.startsWith("@") || palabra.startsWith("#"))
return null; // we can't convert this in isolation! We need context.
char []caract;
int i, j, len;
String nuevaPalabra;
caract = palabra.toCharArray();
len = palabra.length();
for (j=0; j<len; j++)
{
i = j;
/*ciclo:
while(true) // para manejar excepciones; que honda!
{
switch(caract[i])
{
case 'A':
if (i>0)
{
i--;
break;
}
default:*/
if (Character.isLowerCase(caract[i]))
caract[i] = Character.toUpperCase(caract[i]);
else if (Character.isUpperCase(caract[i]))
caract[i] = Character.toLowerCase(caract[i]);
/* break ciclo;
}
}*/
}
nuevaPalabra = new String(caract);
// nuevaPalabra = palabra.toUpperCase();
// ahora hacer los cambios de Michael Roach
nuevaPalabra = replace(nuevaPalabra, "TSH", "TQQ");
nuevaPalabra = replace(nuevaPalabra, "TS", "TZ");
nuevaPalabra = replace(nuevaPalabra, "TQQ", "TS");
nuevaPalabra = replace(nuevaPalabra, "a", "'A");
nuevaPalabra = replace(nuevaPalabra, "i", "'I");
nuevaPalabra = replace(nuevaPalabra, "u", "'U");
nuevaPalabra = replace(nuevaPalabra, "-I", "i");
nuevaPalabra = replace(nuevaPalabra, "/", ",");
nuevaPalabra = replace(nuevaPalabra, "_", " ");
nuevaPalabra = replace(nuevaPalabra, "|", ";");
nuevaPalabra = fixWazur(nuevaPalabra);
return nuevaPalabra;
}
}
/** If more than half of the first letters among the first are 10 characters
are uppercase assume its acip */
@ -263,125 +203,30 @@ public class Manipulate
else return (letters / upperCase < 2);
}
public static String acipToWylie(String linea)
public static boolean isTibetanUnicodeCharacter(char ch)
{
char caract[], ch, chP, chN;
String nuevaLinea;
int i, len;
boolean open;
caract = linea.toCharArray();
len = linea.length();
for (i=0; i<len; i++)
{
if (Character.isLowerCase(caract[i]))
caract[i] = Character.toUpperCase(caract[i]);
else if (Character.isUpperCase(caract[i]))
caract[i] = Character.toLowerCase(caract[i]);
}
nuevaLinea = new String(caract);
/* ahora hacer los cambios de Michael Roach ts -> tsh, tz -> ts, v -> w,
TH -> Th, kSH, kaSH -> k+Sh, SH -> Sh, : -> H, dh -> d+h, gh -> g+h, bh -> b+h, dzh -> dz+h,
aa -> a, a'a -> A, ai->i, aee ->ai, au->u, aoo->au, ae->e,
ao->o, ee->ai, oo->au, 'I->-I I->-i, a'i->I, a'u->U, a'e->E, a'o->O,
a'i->I, a'u->U, a'e->E, a'o->O, ,->/, # -> @##, * -> @#, \ -> ?, ` -> !,
/-/ -> (-), ga-y -> g.y, g-y -> g.y, na-y -> n+y */
nuevaLinea = replace(nuevaLinea, "ts", "tq");
nuevaLinea = replace(nuevaLinea, "tz", "ts");
nuevaLinea = replace(nuevaLinea, "tq", "tsh");
nuevaLinea = replace(nuevaLinea, "v", "w");
nuevaLinea = replace(nuevaLinea, "TH", "Th");
nuevaLinea = replace(nuevaLinea, "kSH", "k+Sh");
nuevaLinea = replace(nuevaLinea, "kaSH", "k+Sh");
nuevaLinea = replace(nuevaLinea, "SH", "Sh");
nuevaLinea = replace(nuevaLinea, ":", "H");
nuevaLinea = replace(nuevaLinea, "NH", "NaH");
nuevaLinea = replace(nuevaLinea, "dh", "d+h");
nuevaLinea = replace(nuevaLinea, "gh", "g+h");
nuevaLinea = replace(nuevaLinea, "bh", "b+h");
nuevaLinea = replace(nuevaLinea, "dzh", "dz+h");
nuevaLinea = replace(nuevaLinea, "aa", "a");
nuevaLinea = replace(nuevaLinea, "ai", "i");
nuevaLinea = replace(nuevaLinea, "aee", "ai");
nuevaLinea = replace(nuevaLinea, "au", "u");
nuevaLinea = replace(nuevaLinea, "aoo", "au");
nuevaLinea = replace(nuevaLinea, "ae", "e");
nuevaLinea = replace(nuevaLinea, "ao", "o");
nuevaLinea = replace(nuevaLinea, "ee", "ai");
nuevaLinea = replace(nuevaLinea, "oo", "au");
nuevaLinea = replace(nuevaLinea, "\'I", "\'q");
nuevaLinea = replace(nuevaLinea, "I", "-i");
nuevaLinea = replace(nuevaLinea, "\'q", "-I");
nuevaLinea = replace(nuevaLinea, "\\", "?");
nuevaLinea = replace(nuevaLinea, "`", "!");
nuevaLinea = replace(nuevaLinea, "ga-y", "g.y");
nuevaLinea = replace(nuevaLinea, "g-y", "g.y");
nuevaLinea = replace(nuevaLinea, "na-y", "n+y");
len = nuevaLinea.length();
for (i=0; i<len; i++)
{
ch = nuevaLinea.charAt(i);
switch(ch)
{
case '#':
nuevaLinea = nuevaLinea.substring(0,i) + "@##" + nuevaLinea.substring(i+1);
i+=3;
len+=2;
break;
case '*':
nuevaLinea = nuevaLinea.substring(0,i) + "@#" + nuevaLinea.substring(i+1);
i+=2;
len++;
break;
case '\'':
if (i>0 && i<len-1)
{
chP = nuevaLinea.charAt(i-1);
chN = nuevaLinea.charAt(i+1);
if (isVowel(chN))
{
if (Character.isLetter(chP) && !isVowel(chP))
{
nuevaLinea = nuevaLinea.substring(0, i) + Character.toUpperCase(chN) + nuevaLinea.substring(i+2);
len--;
}
else if (chP=='a' && (i==1 || i>1 && !Character.isLetter(nuevaLinea.charAt(i-2)) || chN == 'a' && (i+2==len || !Character.isLetter(nuevaLinea.charAt(i+2)))))
{
nuevaLinea = nuevaLinea.substring(0,i-1) + Character.toUpperCase(chN) + nuevaLinea.substring(i+2);
len-=2;
}
}
}
}
}
open = false;
for (i=0; i<len; i++)
{
ch = nuevaLinea.charAt(i);
if (ch=='/')
{
if (open)
{
nuevaLinea = nuevaLinea.substring(0, i) + ")" + nuevaLinea.substring(i+1);
open = false;
}
else
{
nuevaLinea = nuevaLinea.substring(0, i) + "(" + nuevaLinea.substring(i+1);
open = true;
}
}
}
nuevaLinea = replace(nuevaLinea, ",", "/");
return nuevaLinea;
return ch>=0xF00 && ch<=0xFFF;
}
public static boolean guessIfUnicode(String line)
{
char ch;
int letters=0, unicode=0, i, n;
n = line.length();
if (n>10) n = 10;
for (i=0; i<n; i++)
{
ch = line.charAt(i);
if (Character.isLetter(ch))
{
letters++;
if (isTibetanUnicodeCharacter(ch)) unicode++;
}
}
if (letters==0 || unicode==0) return false;
else return (letters / unicode < 2);
}
public static String fixWazur(String linea)
{
int i;
@ -529,5 +374,275 @@ public class Manipulate
System.out.println(palabra + '\t' + definicion);
}
if (psPalabras!=null) psPalabras.flush();
}*/
}*/
public static String acipToWylie(String acip)
{
TibetanDocument tibDoc = new TibetanDocument();
try
{
TibTextUtils.insertTibetanMachineWebForTranslit(false, acip, tibDoc, 0, false);
}
catch (InvalidTransliterationException e)
{
return null;
}
return tibDoc.getWylie(new boolean[] { false });
/* char caract[], ch, chP, chN;
String nuevaLinea;
int i, len;
boolean open;
caract = acip.toCharArray();
len = acip.length();
for (i=0; i<len; i++)
{
if (Character.isLowerCase(caract[i]))
caract[i] = Character.toUpperCase(caract[i]);
else if (Character.isUpperCase(caract[i]))
caract[i] = Character.toLowerCase(caract[i]);
}
nuevaLinea = new String(caract);
/* ahora hacer los cambios de Michael Roach ts -> tsh, tz -> ts, v -> w,
TH -> Th, kSH, kaSH -> k+Sh, SH -> Sh, : -> H, dh -> d+h, gh -> g+h, bh -> b+h, dzh -> dz+h,
aa -> a, a'a -> A, ai->i, aee ->ai, au->u, aoo->au, ae->e,
ao->o, ee->ai, oo->au, 'I->-I I->-i, a'i->I, a'u->U, a'e->E, a'o->O,
a'i->I, a'u->U, a'e->E, a'o->O, ,->/, # -> @##, * -> @#, \ -> ?, ` -> !,
/-/ -> (-), ga-y -> g.y, g-y -> g.y, na-y -> n+y
nuevaLinea = replace(nuevaLinea, "ts", "tq");
nuevaLinea = replace(nuevaLinea, "tz", "ts");
nuevaLinea = replace(nuevaLinea, "tq", "tsh");
nuevaLinea = replace(nuevaLinea, "v", "w");
nuevaLinea = replace(nuevaLinea, "TH", "Th");
nuevaLinea = replace(nuevaLinea, "kSH", "k+Sh");
nuevaLinea = replace(nuevaLinea, "kaSH", "k+Sh");
nuevaLinea = replace(nuevaLinea, "SH", "Sh");
nuevaLinea = replace(nuevaLinea, ":", "H");
nuevaLinea = replace(nuevaLinea, "NH", "NaH");
nuevaLinea = replace(nuevaLinea, "dh", "d+h");
nuevaLinea = replace(nuevaLinea, "gh", "g+h");
nuevaLinea = replace(nuevaLinea, "bh", "b+h");
nuevaLinea = replace(nuevaLinea, "dzh", "dz+h");
nuevaLinea = replace(nuevaLinea, "aa", "a");
nuevaLinea = replace(nuevaLinea, "ai", "i");
nuevaLinea = replace(nuevaLinea, "aee", "ai");
nuevaLinea = replace(nuevaLinea, "au", "u");
nuevaLinea = replace(nuevaLinea, "aoo", "au");
nuevaLinea = replace(nuevaLinea, "ae", "e");
nuevaLinea = replace(nuevaLinea, "ao", "o");
nuevaLinea = replace(nuevaLinea, "ee", "ai");
nuevaLinea = replace(nuevaLinea, "oo", "au");
nuevaLinea = replace(nuevaLinea, "\'I", "\'q");
nuevaLinea = replace(nuevaLinea, "I", "-i");
nuevaLinea = replace(nuevaLinea, "\'q", "-I");
nuevaLinea = replace(nuevaLinea, "\\", "?");
nuevaLinea = replace(nuevaLinea, "`", "!");
nuevaLinea = replace(nuevaLinea, "ga-y", "g.y");
nuevaLinea = replace(nuevaLinea, "g-y", "g.y");
nuevaLinea = replace(nuevaLinea, "na-y", "n+y");
len = nuevaLinea.length();
for (i=0; i<len; i++)
{
ch = nuevaLinea.charAt(i);
switch(ch)
{
case '#':
nuevaLinea = nuevaLinea.substring(0,i) + "@##" + nuevaLinea.substring(i+1);
i+=3;
len+=2;
break;
case '*':
nuevaLinea = nuevaLinea.substring(0,i) + "@#" + nuevaLinea.substring(i+1);
i+=2;
len++;
break;
case '\'':
if (i>0 && i<len-1)
{
chP = nuevaLinea.charAt(i-1);
chN = nuevaLinea.charAt(i+1);
if (isVowel(chN))
{
if (Character.isLetter(chP) && !isVowel(chP))
{
nuevaLinea = nuevaLinea.substring(0, i) + Character.toUpperCase(chN) + nuevaLinea.substring(i+2);
len--;
}
else if (chP=='a' && (i==1 || i>1 && !Character.isLetter(nuevaLinea.charAt(i-2)) || chN == 'a' && (i+2==len || !Character.isLetter(nuevaLinea.charAt(i+2)))))
{
nuevaLinea = nuevaLinea.substring(0,i-1) + Character.toUpperCase(chN) + nuevaLinea.substring(i+2);
len-=2;
}
}
}
}
}
open = false;
for (i=0; i<len; i++)
{
ch = nuevaLinea.charAt(i);
if (ch=='/')
{
if (open)
{
nuevaLinea = nuevaLinea.substring(0, i) + ")" + nuevaLinea.substring(i+1);
open = false;
}
else
{
nuevaLinea = nuevaLinea.substring(0, i) + "(" + nuevaLinea.substring(i+1);
open = true;
}
}
}
nuevaLinea = replace(nuevaLinea, ",", "/");
return nuevaLinea; */
}
public static String wylieToAcip(String wylie)
{
TibetanDocument tibDoc = new TibetanDocument();
try
{
TibTextUtils.insertTibetanMachineWebForTranslit(false, wylie, tibDoc, 0, false);
}
catch (InvalidTransliterationException e)
{
return null;
}
return tibDoc.getACIP(new boolean[] { false });
/* DLC FIXME: for unknown things, return null.
if (wylie.equals("@##")) return "#";
if (wylie.equals("@#")) return "*";
if (wylie.equals("!")) return "`";
if (wylie.equals("b+h")) return "BH";
if (wylie.equals("d+h")) return "DH";
if (wylie.equals("X")) return null;
if (wylie.equals("iA")) return null;
if (wylie.equals("ai")) return "EE";
if (wylie.equals("au")) return "OO";
if (wylie.equals("$")) return null;
if (wylie.startsWith("@") || wylie.startsWith("#"))
return null; // we can't convert this in isolation! We need context.
char []caract;
int i, j, len;
String nuevaPalabra;
caract = wylie.toCharArray();
len = wylie.length();
for (j=0; j<len; j++)
{
i = j;
/*ciclo:
while(true) // para manejar excepciones; que honda!
{
switch(caract[i])
{
case 'A':
if (i>0)
{
i--;
break;
}
default:
if (Character.isLowerCase(caract[i]))
caract[i] = Character.toUpperCase(caract[i]);
else if (Character.isUpperCase(caract[i]))
caract[i] = Character.toLowerCase(caract[i]);
/* break ciclo;
}
}
}
nuevaPalabra = new String(caract);
// nuevaPalabra = palabra.toUpperCase();
// ahora hacer los cambios de Michael Roach
nuevaPalabra = replace(nuevaPalabra, "TSH", "TQQ");
nuevaPalabra = replace(nuevaPalabra, "TS", "TZ");
nuevaPalabra = replace(nuevaPalabra, "TQQ", "TS");
nuevaPalabra = replace(nuevaPalabra, "a", "'A");
nuevaPalabra = replace(nuevaPalabra, "i", "'I");
nuevaPalabra = replace(nuevaPalabra, "u", "'U");
nuevaPalabra = replace(nuevaPalabra, "-I", "i");
nuevaPalabra = replace(nuevaPalabra, "/", ",");
nuevaPalabra = replace(nuevaPalabra, "_", " ");
nuevaPalabra = replace(nuevaPalabra, "|", ";");
nuevaPalabra = fixWazur(nuevaPalabra);
return nuevaPalabra; */
}
public static String unicodeToWylie(String unicode)
{
String machineWylie;
TibetanDocument tibDoc = new TibetanDocument();
StringBuffer errors = new StringBuffer();
machineWylie = Converter.convertToEwtsForComputers(unicode, errors);
try
{
TibTextUtils.insertTibetanMachineWebForTranslit(true, machineWylie, tibDoc, 0, false);
}
catch (InvalidTransliterationException e)
{
return null;
}
return tibDoc.getWylie(new boolean[] { false });
}
/** From http://www.i18nfaq.com/2005/07/how-do-i-convert-ncr-format-to-java.html */
public static String NCR2UnicodeString(String str)
{
StringBuffer ostr = new StringBuffer();
int i1=0;
int i2=0;
while(i2<str.length())
{
i1 = str.indexOf("&#",i2);
if (i1 == -1 ) {
ostr.append(str.substring(i2, str.length()));
break ;
}
ostr.append(str.substring(i2, i1));
i2 = str.indexOf(";", i1);
if (i2 == -1 ) {
ostr.append(str.substring(i1, str.length()));
break ;
}
String tok = str.substring(i1+2, i2);
try {
int radix = 10 ;
if (tok.trim().charAt(0) == 'x') {
radix = 16 ;
tok = tok.substring(1,tok.length());
}
ostr.append((char) Integer.parseInt(tok, radix));
} catch (NumberFormatException exp) {
ostr.append('?') ;
}
i2++ ;
}
return new String(ostr) ;
}
public static String UnicodeString2NCR(String str)
{
StringBuffer ncr = new StringBuffer();
int i;
for (i=0; i<str.length(); i++)
{
ncr.append("&#" + Integer.toString(str.charAt(i)) + ";");
}
return ncr.toString();
}
}

View file

@ -1,20 +1,20 @@
/*
The contents of this file are subject to the AMP Open Community License
Version 1.0 (the "License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License on the AMP web site
(http://www.tibet.iteso.mx/Guatemala/).
Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
License for the specific terms governing rights and limitations under the
License.
The Initial Developer of this software is Andres Montano Pellegrini. Portions
created by Andres Montano Pellegrini are Copyright 2001 Andres Montano
Pellegrini. All Rights Reserved.
Contributor(s): ______________________________________.
*/
The contents of this file are subject to the AMP Open Community License
Version 1.0 (the "License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License on the AMP web site
(http://www.tibet.iteso.mx/Guatemala/).
Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
License for the specific terms governing rights and limitations under the
License.
The Initial Developer of this software is Andres Montano Pellegrini. Portions
created by Andres Montano Pellegrini are Copyright 2001 Andres Montano
Pellegrini. All Rights Reserved.
Contributor(s): ______________________________________.
*/
package org.thdl.tib.scanner;
@ -29,24 +29,24 @@ import javax.servlet.http.HttpServletResponse;
import org.thdl.util.ThdlOptions;
/** Interfase to provide access to an on-line dictionary through a form in html;
Inputs Tibetan text (Roman script only) and displays the
words (Roman or Tibetan script) with their definitions.
Runs on the server and is called upon through an HTTP request directly
by the browser. Requires no additional software installed on the client.
@author Andr&eacute;s Montano Pellegrini
*/
Inputs Tibetan text (Roman script only) and displays the
words (Roman or Tibetan script) with their definitions.
Runs on the server and is called upon through an HTTP request directly
by the browser. Requires no additional software installed on the client.
@author Andr&eacute;s Montano Pellegrini
*/
public class OnLineScannerFilter extends HttpServlet
{
private final static String propertyFile = "dictionary";
private final static String dictNameProperty = "onlinescannerfilter.dict-file-name";
private final static String otherLinksProperty = "onlinescannerfilter.links-to-other-stuff";
private final static String clearStr = "Clear";
private final static String buttonStr = "button";
private final static String scriptStr = "script";
private final static String tibetanStr = "tibetan";
ResourceBundle rb;
private final static String propertyFile = "dictionary";
private final static String dictNameProperty = "onlinescannerfilter.dict-file-name";
private final static String otherLinksProperty = "onlinescannerfilter.links-to-other-stuff";
private final static String clearStr = "Clear";
private final static String buttonStr = "button";
private final static String scriptStr = "script";
private final static String tibetanStr = "tibetan";
ResourceBundle rb;
private TibetanScanner scanner;
private String dictionaries[];
private ScannerLogger sl;
@ -55,150 +55,150 @@ public class OnLineScannerFilter extends HttpServlet
{
rb = ResourceBundle.getBundle(propertyFile);
sl = new ScannerLogger();
try
{
scanner = new LocalTibetanScanner(rb.getString(dictNameProperty), false);
scanner = new LocalTibetanScanner(rb.getString(dictNameProperty), false);
}
catch (Exception e)
{
sl.writeLog("Crash\tOnLineScannerFilter");
sl.writeException(e);
sl.writeLog("Crash\tOnLineScannerFilter");
sl.writeException(e);
}
dictionaries = scanner.getDictionaryDescriptions();
sl.writeLog("Creation\tOnLineScannerFilter");
}
synchronized public void doGet(HttpServletRequest request,
HttpServletResponse response) //throws IOException, ServletException
{
synchronized public void doGet(HttpServletRequest request,
HttpServletResponse response) //throws IOException, ServletException
{
String answer, parrafo = null, checkboxName;
// if this line is included in the constructor, it works on the orion server but not on wyllie!
ThdlOptions.setUserPreference("thdl.rely.on.system.tmw.fonts", true);
ThdlOptions.setUserPreference("thdl.rely.on.system.tm.fonts", true);
response.setContentType("text/html");
PrintWriter out;
sl.setUserIP(request.getRemoteAddr());
try
{
out = response.getWriter();
}
// if this line is included in the constructor, it works on the orion server but not on wyllie!
ThdlOptions.setUserPreference("thdl.rely.on.system.tmw.fonts", true);
ThdlOptions.setUserPreference("thdl.rely.on.system.tm.fonts", true);
response.setContentType("text/html");
PrintWriter out;
sl.setUserIP(request.getRemoteAddr());
try
{
out = response.getWriter();
}
catch (Exception e)
{
sl.writeLog("Crash\tOnLineScannerFilter");
sl.writeException(e);
return;
sl.writeLog("Crash\tOnLineScannerFilter");
sl.writeException(e);
return;
}
BitDictionarySource ds=null;
boolean checkedDicts[], allUnchecked, wantsTibetan, useTHDLBanner = (request.getParameter("thdlBanner")!=null);
// int percent=100;
out.println("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">");
out.println("<html xmlns=\"http://www.w3.org/1999/xhtml\">");
out.println("<head>");
out.println("<head>");
if (useTHDLBanner)
{
out.println(" <title>Tibetan and Himalayan Digital Library - The Online Tibetan to English Translation/Dictionary Tool</title>");
out.println(" <meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />");
out.println(" <script type=\"text/javascript\" src=\"/thdl/scripts/thdl_scripts.js\"></script>");
out.println(" <link rel=\"stylesheet\" type=\"text/css\" href=\"/thdl/style/thdl-styles.css\"/>");
}
else
out.println(" <title>The Online Tibetan to English Translation/Dictionary Tool</title>");
out.println(" <meta name=\"keywords\" content=\"tibetan, english, dictionary, jim valby, rangjung yeshe, jeffrey hopkins, tsig mdzod chen mo, online, translation, scanner, parser, buddhism, language, processing, font, dharma, chos, tibet\">");
out.println(" <meta name=\"Description\" content=\"This Java tool takes Tibetan language passages and divides the passages up into their component phrases and words, and displays corresponding dictionary definitions.\">");
out.println(" <meta name=\"MSSmartTagsPreventParsing\" content=\"TRUE\">");
out.println(" <title>Tibetan and Himalayan Digital Library - The Online Tibetan to English Translation/Dictionary Tool</title>");
out.println(" <meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />");
out.println(" <script type=\"text/javascript\" src=\"http://www.thdl.org/scripts/thdl_scripts.js\"></script>");
out.println(" <link rel=\"stylesheet\" type=\"text/css\" href=\"http://www.thdl.org/style/thdl-styles.css\"/>");
}
else
out.println(" <title>The Online Tibetan to English Translation/Dictionary Tool</title>");
out.println(" <meta name=\"keywords\" content=\"tibetan, english, dictionary, jim valby, rangjung yeshe, jeffrey hopkins, tsig mdzod chen mo, online, translation, scanner, parser, buddhism, language, processing, font, dharma, chos, tibet\">");
out.println(" <meta name=\"Description\" content=\"This Java tool takes Tibetan language passages and divides the passages up into their component phrases and words, and displays corresponding dictionary definitions.\">");
out.println(" <meta name=\"MSSmartTagsPreventParsing\" content=\"TRUE\">");
answer = request.getParameter(scriptStr);
/* script==null || makes default tibetan
script!=null && makes default roman
*/
wantsTibetan = (answer==null || answer.equals(tibetanStr));
/*if (wantsTibetan)
{
out.println("<style>.tmw {font: 28pt TibetanMachineWeb}");
out.println(".tmw1 {font: 28pt TibetanMachineWeb1}");
out.println(".tmw2 {font: 28pt TibetanMachineWeb2}");
out.println(".tmw3 {font: 28pt TibetanMachineWeb3}");
out.println(".tmw4 {font: 28pt TibetanMachineWeb4}");
out.println(".tmw5 {font: 28pt TibetanMachineWeb5}");
out.println(".tmw6 {font: 28pt TibetanMachineWeb6}");
out.println(".tmw7 {font: 28pt TibetanMachineWeb7}");
out.println(".tmw8 {font: 28pt TibetanMachineWeb8}");
out.println(".tmw9 {font: 28pt TibetanMachineWeb9}");
out.println("</style>");
}*/
out.println("</head>");
out.println("<body>");
answer = request.getParameter(scriptStr);
/* script==null || makes default tibetan
script!=null && makes default roman
*/
wantsTibetan = (answer==null || answer.equals(tibetanStr));
if (wantsTibetan)
{
out.println("<style>.tmw {font: 28pt TibetanMachineWeb}");
out.println(".tmw1 {font: 28pt TibetanMachineWeb1}");
out.println(".tmw2 {font: 28pt TibetanMachineWeb2}");
out.println(".tmw3 {font: 28pt TibetanMachineWeb3}");
out.println(".tmw4 {font: 28pt TibetanMachineWeb4}");
out.println(".tmw5 {font: 28pt TibetanMachineWeb5}");
out.println(".tmw6 {font: 28pt TibetanMachineWeb6}");
out.println(".tmw7 {font: 28pt TibetanMachineWeb7}");
out.println(".tmw8 {font: 28pt TibetanMachineWeb8}");
out.println(".tmw9 {font: 28pt TibetanMachineWeb9}");
out.println("</style>");
}
out.println("</head>");
out.println("<body>");
if (useTHDLBanner)
{
out.println("<script type=\"text/javascript\" src=\"/thdl/scripts/banner.js\"></script>");
out.println("<div id=\"sub_banner\">");
out.println("<div id=\"search\">");
out.println(" <form method=\"get\" action=\"http://www.google.com/u/thdl\">");
out.println(" <p>");
out.println(" <input type=\"text\" name=\"q\" id=\"q\" size=\"15\" maxlength=\"255\" value=\"\" />");
out.println(" <input type=\"submit\" name=\"sa\" id=\"sa\" value=\"Search\"/>");
out.println(" <input type=\"hidden\" name=\"hq\" id=\"hq\" value=\"inurl:orion.lib.virginia.edu\"/>");
out.println(" </p>");
out.println(" </form>");
out.println(" </div>");
out.println(" <div id=\"breadcrumbs\">");
out.println(" <a href=\"/thdl/index.html\">Home</a> &gt; <a href=\"/thdl/reference/index.html\">Reference</a> &gt; Translation Tool");
out.println(" </div>");
out.println("</div><!--END sub_banner-->");
out.println("<div id=\"main\">");
}
out.println("<h3 align=\"center\">The Online Tibetan to English Translation/Dictionary Tool</h3>");
try
{
out.println(rb.getString(otherLinksProperty));
}
catch (MissingResourceException e)
{
// do nothing
}
out.println("<script type=\"text/javascript\" src=\"/thdl/scripts/banner.js\"></script>");
out.println("<div id=\"sub_banner\">");
out.println("<div id=\"search\">");
out.println(" <form method=\"get\" action=\"http://www.google.com/u/thdl\">");
out.println(" <p>");
out.println(" <input type=\"text\" name=\"q\" id=\"q\" size=\"15\" maxlength=\"255\" value=\"\" />");
out.println(" <input type=\"submit\" name=\"sa\" id=\"sa\" value=\"Search\"/>");
out.println(" <input type=\"hidden\" name=\"hq\" id=\"hq\" value=\"inurl:orion.lib.virginia.edu\"/>");
out.println(" </p>");
out.println(" </form>");
out.println(" </div>");
out.println(" <div id=\"breadcrumbs\">");
out.println(" <a href=\"/thdl/index.html\">Home</a> &gt; <a href=\"/thdl/reference/index.html\">Reference</a> &gt; Translation Tool");
out.println(" </div>");
out.println("</div><!--END sub_banner-->");
out.println("<div id=\"main\">");
}
out.println("<h3 align=\"center\">The Online Tibetan to English Translation/Dictionary Tool</h3>");
try
{
out.println(rb.getString(otherLinksProperty));
}
catch (MissingResourceException e)
{
// do nothing
}
if (useTHDLBanner)
{
out.println("<form action=\"org.thdl.tib.scanner.OnLineScannerFilter?thdlBanner=on\" method=POST>");
}
else
{
out.println("<form action=\"org.thdl.tib.scanner.OnLineScannerFilter\" method=POST>");
}
out.println("<table border=\"0\" width=\"100%\">");
out.println(" <tr>");
out.println(" <td width=\"25%\">");
out.println(" <p>Display results in:</td>");
out.println(" <td width=\"75%\">");
out.println(" <p><input type=\"radio\" value=\"" + tibetanStr + "\" ");
if (wantsTibetan) out.println("checked ");
out.println("name=\"" + scriptStr + "\">Tibetan script (using <a href=\"http://orion.lib.virginia.edu/thdl/tools/tmw.html\" target=\"_blank\">Tibetan Machine Web font</a>)<br/>");
out.println(" <input type=\"radio\" value=\"roman\" ");
if (!wantsTibetan) out.println("checked ");
out.println("name=\"" + scriptStr + "\">Roman script</td>");
out.println(" </tr>");
out.println("</table>");
out.println("<form action=\"org.thdl.tib.scanner.OnLineScannerFilter?thdlBanner=on\" method=POST>");
}
else
{
out.println("<form action=\"org.thdl.tib.scanner.OnLineScannerFilter\" method=POST>");
}
out.println("<table border=\"0\" width=\"100%\">");
out.println(" <tr>");
out.println(" <td width=\"25%\">");
out.println(" <p>Display results in:</td>");
out.println(" <td width=\"75%\">");
out.println(" <p><input type=\"radio\" value=\"" + tibetanStr + "\" ");
if (wantsTibetan) out.println("checked ");
out.println("name=\"" + scriptStr + "\">Tibetan script (using <a href=\"http://www.thdl.org/xml/show.php?xml=/tools/tibfonts.xml&l=uva10928423419921\" target=\"_blank\">Tibetan Machine Uni font</a>)<br/>");
out.println(" <input type=\"radio\" value=\"roman\" ");
if (!wantsTibetan) out.println("checked ");
out.println("name=\"" + scriptStr + "\">Roman script</td>");
out.println(" </tr>");
out.println("</table>");
if (dictionaries!=null)
{
int i;
ds = scanner.getDictionarySource();
ds.reset();
checkedDicts = new boolean[dictionaries.length];
/* out.println(" <tr>");
out.println("<td width=\""+ percent +"%\">Search in dictionaries:</td>");*/
/* out.println(" <tr>");
out.println("<td width=\""+ percent +"%\">Search in dictionaries:</td>");*/
out.println("<p>Search in dictionaries: ");
allUnchecked=true;
for (i=0; i<dictionaries.length; i++)
@ -244,50 +244,52 @@ public class OnLineScannerFilter extends HttpServlet
else ds = BitDictionarySource.getAllDictionaries();
// out.println("</table>");
out.println("</p>");
out.println("<table border=\"0\" width=\"100%\">");
out.println(" <tr>");
out.println(" <td width=\"35%\">");
out.println("<table border=\"0\" width=\"100%\">");
out.println(" <tr>");
out.println(" <td width=\"35%\">");
out.println(" <p><strong>Input text:</strong></p>");
out.println(" </td>");
out.println(" <td width=\"65%\">");
out.println(" <p> <input type=\"submit\" name=\"" + buttonStr + "\" value=\"Translate\"> <input type=\"submit\" name=\"" + buttonStr + "\" value=\"" + clearStr + "\"></p>");
out.println(" </td>");
out.println(" </tr>");
out.println("</table>");
out.println(" </td>");
out.println(" <td width=\"65%\">");
out.println(" <p> <input type=\"submit\" name=\"" + buttonStr + "\" value=\"Translate\"> <input type=\"submit\" name=\"" + buttonStr + "\" value=\"" + clearStr + "\"></p>");
out.println(" </td>");
out.println(" </tr>");
out.println("</table>");
out.println("<textarea rows=\"12\" name=\"parrafo\" cols=\"60\">");
// Paragraph should be empty if the user just clicked the clear button
out.print("<textarea rows=\"5\" name=\"parrafo\" cols=\"40\"");
if (wantsTibetan) out.print(" class=\"tib\"");
out.println(">");
// Paragraph should be empty if the user just clicked the clear button
answer = request.getParameter(buttonStr);
if (answer == null || answer != null && !answer.equals(clearStr))
{
parrafo = request.getParameter("parrafo");
if (parrafo!=null) out.print(parrafo);
}
out.println("</textarea>");
out.println("</form>");
if (answer == null || answer != null && !answer.equals(clearStr))
{
parrafo = request.getParameter("parrafo");
if (parrafo!=null) out.print(parrafo);
}
if (parrafo != null)
{
sl.writeLog("Translation\tOnLineScannerFilter");
if (ds!=null && !ds.isEmpty())
desglosar(parrafo, out, wantsTibetan);
}
else sl.writeLog("Invocation\tOnLineScannerFilter");
out.println("</textarea>");
out.println("</form>");
if (parrafo != null)
{
sl.writeLog("Translation\tOnLineScannerFilter");
if (ds!=null && !ds.isEmpty())
desglosar(parrafo, out, wantsTibetan);
}
else sl.writeLog("Invocation\tOnLineScannerFilter");
out.println(TibetanScanner.copyrightHTML);
if (useTHDLBanner) out.println("</div><!--END main-->");
out.println("</body>");
out.println("</html>");
}
out.println("</body>");
out.println("</html>");
}
public void doPost(HttpServletRequest request,
HttpServletResponse response)
//throws IOException, ServletException
{
doGet(request, response);
}
public void doPost(HttpServletRequest request,
HttpServletResponse response)
//throws IOException, ServletException
{
doGet(request, response);
}
synchronized public void desglosar(String in, PrintWriter pw, boolean tibetan)
{
@ -298,22 +300,25 @@ public class OnLineScannerFilter extends HttpServlet
if (!in.equals(""))
{
/* while (hayMasLineas)
{
fin = in.indexOf("\n",init);
if (fin<0)
{
linea = in.substring(init).trim();
hayMasLineas=false;
}
else
linea = in.substring(init, fin).trim();
scanner.scanBody(linea);
init = fin+1;
} */
/* while (hayMasLineas)
{
fin = in.indexOf("\n",init);
if (fin<0)
{
linea = in.substring(init).trim();
hayMasLineas=false;
}
else
linea = in.substring(init, fin).trim();
scanner.scanBody(linea);
init = fin+1;
} */
scanner.clearTokens();
in = Manipulate.NCR2UnicodeString(in);
if (Manipulate.guessIfUnicode(in)) in = Manipulate.unicodeToWylie(in);
else if (Manipulate.guessIfAcip(in)) in = Manipulate.acipToWylie(in);
scanner.scanBody(in);
scanner.finishUp();
printText(pw, tibetan);
@ -335,35 +340,35 @@ public class OnLineScannerFilter extends HttpServlet
for (i=0; i < words.length; i++)
{
if (words[i] instanceof Word)
{
word = new SwingWord((Word)words[i]);
// if (word.getDefs().getDictionarySource()!=null)
pw.print(word.getLink());
// else pw.print(word.getWylie() + " ");
}
else
{
if (words[i] instanceof PunctuationMark)
{
pm = words[i].toString().charAt(0);
switch (pm)
{
case '\n':
pw.println("</p>");
pw.print("<p>");
break;
case '<':
pw.print("&lt; ");
break;
case '>':
pw.print("&gt; ");
break;
default:
pw.print(pm + " ");
}
}
}
if (words[i] instanceof Word)
{
word = new SwingWord((Word)words[i]);
// if (word.getDefs().getDictionarySource()!=null)
pw.print(word.getLink(tibetan));
// else pw.print(word.getWylie() + " ");
}
else
{
if (words[i] instanceof PunctuationMark)
{
pm = words[i].toString().charAt(0);
switch (pm)
{
case '\n':
pw.println("</p>");
pw.print("<p>");
break;
case '<':
pw.print("&lt; ");
break;
case '>':
pw.print("&gt; ");
break;
default:
pw.print(pm + " ");
}
}
}
}
pw.println("</p>");
}
@ -376,17 +381,17 @@ public class OnLineScannerFilter extends HttpServlet
String tag;
DictionarySource ds;
ByteDictionarySource sourceb=null;
words = scanner.getWordArray(false);
if (words == null)
return;
pw.println("<table border=\"1\" width=\"100%\">");
for (j = 0; j < words.length; j++) {
try {
word = new SwingWord(words[j]);
defs = word.getDefs();
ds = defs.getDictionarySource();
@ -400,19 +405,20 @@ public class OnLineScannerFilter extends HttpServlet
}
else {
sourceb = (ByteDictionarySource) ds;
k=0;
while (sourceb.isEmpty(k)) k++;
tag = sourceb.getTag(k);
k++;
k=0;
while (sourceb.isEmpty(k)) k++;
tag = sourceb.getTag(k);
k++;
}
}
pw.println(" <td width=\"20%\" rowspan=\"" + defs.def.length
+ "\" valign=\"top\">" + word.getBookmark(tibetan)
+ "</td>");
pw.print(" <td width=\"20%\" rowspan=\"" + defs.def.length
+ "\" valign=\"top\"");
if (tibetan) pw.print(" class=\"tib\"");
pw.println(">" + word.getBookmark(tibetan) + "</td>");
pw.println(" <td width=\"12%\">" + tag + "</td>");
pw.println(" <td width=\"68%\">" + defs.def[0] + "</td>");
pw.println(" </tr>");
for (i = 1; i < defs.def.length; i++) {
pw.println(" <tr>");
@ -421,9 +427,9 @@ public class OnLineScannerFilter extends HttpServlet
tag = ds.getTag(i);
}
else {
while (sourceb.isEmpty(k)) k++;
tag = sourceb.getTag(k);
k++;
while (sourceb.isEmpty(k)) k++;
tag = sourceb.getTag(k);
k++;
}
pw.println(" <td width=\"12%\">" + tag + "</td>");
@ -435,17 +441,17 @@ public class OnLineScannerFilter extends HttpServlet
sl.writeLog("Crash\tOnLineScannerFilter\t" + word.getWylie());
sl.writeException(e);
}
}
pw.println("</table>");
}
public void destroy()
{
super.destroy();
sl.setUserIP(null);
sl.writeLog("Shutdown\tOnLineScannerFilter");
scanner.destroy();
super.destroy();
sl.setUserIP(null);
sl.writeLog("Shutdown\tOnLineScannerFilter");
scanner.destroy();
}
}

View file

@ -1,20 +1,20 @@
/*
The contents of this file are subject to the AMP Open Community License
Version 1.0 (the "License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License on the AMP web site
(http://www.tibet.iteso.mx/Guatemala/).
Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
License for the specific terms governing rights and limitations under the
License.
The Initial Developer of this software is Andres Montano Pellegrini. Portions
created by Andres Montano Pellegrini are Copyright 2001 Andres Montano
Pellegrini. All Rights Reserved.
Contributor(s): ______________________________________.
*/
The contents of this file are subject to the AMP Open Community License
Version 1.0 (the "License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License on the AMP web site
(http://www.tibet.iteso.mx/Guatemala/).
Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
License for the specific terms governing rights and limitations under the
License.
The Initial Developer of this software is Andres Montano Pellegrini. Portions
created by Andres Montano Pellegrini are Copyright 2001 Andres Montano
Pellegrini. All Rights Reserved.
Contributor(s): ______________________________________.
*/
package org.thdl.tib.scanner;
@ -29,127 +29,136 @@ import javax.swing.text.BadLocationException;
import org.thdl.tib.input.DuffPane;
import org.thdl.tib.text.TibetanDocument;
import org.thdl.tib.text.reverter.*;
import org.thdl.util.RTFFixerInputStream;
import org.thdl.util.ThdlDebug;
import org.thdl.util.ThdlOptions;
/** Identical to DuffPane except that it only supports Tibetan script in
TibetanMachineWeb. No roman script can be inputted. If roman script is
pasted, it is assumed that it is either ACIP or wylie and is converted
accordingly to TibetanMachineWeb. If text in TibetanMachine is pasted,
it is converted to TibetanMachineWeb. Any other font is assumed to be
Roman script.
*/
TibetanMachineWeb. No roman script can be inputted. If roman script is
pasted, it is assumed that it is either ACIP or wylie and is converted
accordingly to TibetanMachineWeb. If text in TibetanMachine is pasted,
it is converted to TibetanMachineWeb. Any other font is assumed to be
Roman script.
*/
public class StrictDuffPane extends DuffPane
{
public StrictDuffPane()
{
super();
disableRoman();
}
/** Smart paste! Automatically recognizes what is being pasted and converts
respectively. Currently it supports pasting from TibetanMachineWeb,
TibetanMachine, wylie, and ACIP.
*/
public void paste(int offset)
{
// Respect setEditable(boolean):
if (!this.isEditable())
return;
try
{
Transferable contents = rtfBoard.getContents(this);
if (contents.isDataFlavorSupported(rtfFlavor)){
InputStream in = (InputStream)contents.getTransferData(rtfFlavor);
int p1 = offset;
//construct new document that contains only portion of text you want to paste
TibetanDocument sd = new TibetanDocument();
// I swear this happened once when I pasted in some
// random junk just after Jskad started up.
ThdlDebug.verify(null != in);
boolean errorReading = false;
try
{
if (!ThdlOptions.getBooleanOption("thdl.do.not.fix.rtf.hex.escapes"))
in = new RTFFixerInputStream(in);
rtfEd.read(in, sd, 0);
} catch (Exception e) {
errorReading = true;
/* If fonts weren't supported and we don't know what it is try to paste
ACIP or wylie.
*/
if (contents.isDataFlavorSupported(DataFlavor.stringFlavor))
{
String data = (String)contents.getTransferData(DataFlavor.stringFlavor);
if (Manipulate.guessIfAcip(data)) data = Manipulate.acipToWylie(data);
toTibetanMachineWeb(data, offset);
}
// JOptionPane.showMessageDialog(this, "You cannot paste from the application from which you copied.\nIt uses an RTF format that is too advanced for the version\nof Java Jskad is running atop.");
}
if (!errorReading)
{
/* If it is any font beside TibetanMachine and TibetanMachineWeb
assume it is wylie or Acip.
*/
if (!sd.getFont((sd.getCharacterElement(0).getAttributes())).getFamily().startsWith("TibetanMachine")
&& contents.isDataFlavorSupported(DataFlavor.stringFlavor))
{
String data = (String)contents.getTransferData(DataFlavor.stringFlavor);
if (Manipulate.guessIfAcip(data)) data = Manipulate.acipToWylie(data);
toTibetanMachineWeb(data, offset);
}
else
{
// If it's font is TibetanMachine, convert to TibetanMachineWeb first
if (sd.getFont((sd.getCharacterElement(0).getAttributes())).getFamily().equals("TibetanMachine"))
{
StringBuffer errors = new StringBuffer();
long numAttemptedReplacements[] = new long[] { 0 };
sd.convertToTMW(0, -1, errors, numAttemptedReplacements);
}
for (int i=0; i<sd.getLength()-1; i++) { //getLength()-1 so that final newline is not included in paste
try
{
String s = sd.getText(i,1);
AttributeSet as = sd.getCharacterElement(i).getAttributes();
getTibDoc().insertString(p1+i, s, as);
} catch (BadLocationException ble)
{
ble.printStackTrace();
ThdlDebug.noteIffyCode();
}
}
}
}
}
else if (contents.isDataFlavorSupported(DataFlavor.stringFlavor))
{
// if it is not in a font, assume it is wylie or ACIP.
String data = (String)contents.getTransferData(DataFlavor.stringFlavor);
if (Manipulate.guessIfAcip(data)) data = Manipulate.acipToWylie(data);
toTibetanMachineWeb(data, offset);
}
} catch (UnsupportedFlavorException ufe) {
ufe.printStackTrace();
ThdlDebug.noteIffyCode();
} catch (IOException ioe) {
ioe.printStackTrace();
ThdlDebug.noteIffyCode();
} catch (IllegalStateException ise) {
ise.printStackTrace();
ThdlDebug.noteIffyCode();
}
}
public StrictDuffPane()
{
super();
disableRoman();
}
/** Smart paste! Automatically recognizes what is being pasted and converts
respectively. Currently it supports pasting from TibetanMachineWeb,
TibetanMachine, wylie, and ACIP.
*/
public void paste(int offset)
{
boolean pasteAsString = false;
// Respect setEditable(boolean):
if (!this.isEditable())
return;
try
{
Transferable contents = rtfBoard.getContents(this);
if (contents.isDataFlavorSupported(rtfFlavor)){
InputStream in = (InputStream)contents.getTransferData(rtfFlavor);
int p1 = offset;
//construct new document that contains only portion of text you want to paste
TibetanDocument sd = new TibetanDocument();
// I swear this happened once when I pasted in some
// random junk just after Jskad started up.
ThdlDebug.verify(null != in);
boolean errorReading = false;
try
{
if (!ThdlOptions.getBooleanOption("thdl.do.not.fix.rtf.hex.escapes"))
in = new RTFFixerInputStream(in);
rtfEd.read(in, sd, 0);
} catch (Exception e) {
errorReading = true;
/* If fonts weren't supported and we don't know what it is try to paste
ACIP or wylie.
*/
if (contents.isDataFlavorSupported(DataFlavor.stringFlavor))
{
pasteAsString = true;
}
// JOptionPane.showMessageDialog(this, "You cannot paste from the application from which you copied.\nIt uses an RTF format that is too advanced for the version\nof Java Jskad is running atop.");
}
if (!errorReading)
{
/* If it is any font beside TibetanMachine and TibetanMachineWeb
assume it is wylie or Acip.
*/
if (!sd.getFont((sd.getCharacterElement(0).getAttributes())).getFamily().startsWith("TibetanMachine")
&& contents.isDataFlavorSupported(DataFlavor.stringFlavor))
{
pasteAsString = true;
}
else
{
// If it's font is TibetanMachine, convert to TibetanMachineWeb first
if (sd.getFont((sd.getCharacterElement(0).getAttributes())).getFamily().equals("TibetanMachine"))
{
StringBuffer errors = new StringBuffer();
long numAttemptedReplacements[] = new long[] { 0 };
sd.convertToTMW(0, -1, errors, numAttemptedReplacements);
}
for (int i=0; i<sd.getLength()-1; i++) { //getLength()-1 so that final newline is not included in paste
try
{
String s = sd.getText(i,1);
AttributeSet as = sd.getCharacterElement(i).getAttributes();
getTibDoc().insertString(p1+i, s, as);
} catch (BadLocationException ble)
{
ble.printStackTrace();
ThdlDebug.noteIffyCode();
}
}
}
}
}
else if (contents.isDataFlavorSupported(DataFlavor.stringFlavor))
{
// if it is not in a font, assume it is wylie or ACIP.
pasteAsString = true;
}
if (pasteAsString)
{
String data = (String)contents.getTransferData(DataFlavor.stringFlavor);
if (Manipulate.guessIfUnicode(data))
{
StringBuffer errors = new StringBuffer();
data = Converter.convertToEwtsForComputers(data, errors);
} else if (Manipulate.guessIfAcip(data)) data = Manipulate.acipToWylie(data);
toTibetanMachineWeb(data, offset);
}
} catch (UnsupportedFlavorException ufe) {
ufe.printStackTrace();
ThdlDebug.noteIffyCode();
} catch (IOException ioe) {
ioe.printStackTrace();
ThdlDebug.noteIffyCode();
} catch (IllegalStateException ise) {
ise.printStackTrace();
ThdlDebug.noteIffyCode();
}
}
}

View file

@ -20,7 +20,8 @@ Contributor(s): ______________________________________.
to store the dictionary. */
package org.thdl.tib.scanner;
import org.thdl.tib.text.TibetanHTML;
//import org.thdl.tib.text.TibetanHTML;
import org.thdl.tib.text.ttt.*;
/** Tibetan word with its corresponding definitions.
@ -60,7 +61,8 @@ public class SwingWord extends Word
{
try
{
localWord = TibetanHTML.getHTML(super.token + " ");
// localWord = TibetanHTML.getHTML(super.token + " ");
localWord = Manipulate.UnicodeString2NCR(EwtsToUnicodeForXslt.convertEwtsTo(super.token + " "));
}
catch (Exception e)
{
@ -82,6 +84,7 @@ public class SwingWord extends Word
public String getLink(boolean tibetan)
{
String localWord, result=null;
String className = "";
if (wordSinDec==null) localWord = super.token;
else localWord = wordSinDec;
@ -89,7 +92,8 @@ public class SwingWord extends Word
{
try
{
result = TibetanHTML.getHTML(localWord + " ");
result = Manipulate.UnicodeString2NCR(EwtsToUnicodeForXslt.convertEwtsTo(localWord + " "));
className = " class = \"tib\"";
}
catch (Exception e)
{
@ -101,6 +105,6 @@ public class SwingWord extends Word
if (tibetan) result+= "</a>";
else result+= "</a> ";
return result;*/
return "<a href=\"#" + super.token + "\">" + result + "</a> ";
return "<a href=\"#" + super.token + "\"" + className + ">" + result + "</a> ";
}
}

View file

@ -27,7 +27,7 @@ import org.thdl.util.ThdlVersion;
*/
public abstract class TibetanScanner
{
public static final String version = "The Tibetan to English Translation Tool, version 3.2.1 compiled on " + ThdlVersion.getTimeOfCompilation() + ". ";
public static final String version = "The Tibetan to English Translation Tool, version 3.3.0 compiled on " + ThdlVersion.getTimeOfCompilation() + ". ";
public static final String copyrightUnicode="Copyright " + '\u00A9' + " 2000-2005 by Andr" + '\u00E9' + "s Montano Pellegrini, all rights reserved.";
public static final String copyrightASCII="Copyright 2000-2005 by Andres Montano Pellegrini, all rights reserved.";
public static final String copyrightHTML="<hr><small><strong>" + version + "Copyright &copy; 2000-2005 by <a href=\"http://www.people.virginia.edu/~am2zb/\" target=\"_blank\">Andr&eacute;s Montano Pellegrini.</a><br/>All rights reserved.</strong></small>";