Fixed translation tool servlet issues: got rid of title, deleted white space, dealt with UTF8 better, etc.

This commit is contained in:
amontano 2009-03-03 05:23:49 +00:00
parent 835e74c0cd
commit 5a0e454a2e
6 changed files with 1574 additions and 1434 deletions

View File

@ -46,6 +46,7 @@ public class BasicTibetanTranscriptionConverter implements FontConverterConstant
private static final int WYLIE_TO_ACIP=2; private static final int WYLIE_TO_ACIP=2;
private static final int UNICODE_TO_WYLIE=3; private static final int UNICODE_TO_WYLIE=3;
private static final int WYLIE_TO_UNICODE=4; private static final int WYLIE_TO_UNICODE=4;
private static final int TIBETAN_UNICODE_RANGE[] = {3840, 4095};
/** Converts from the Acip transliteration scheme to EWTS.*/ /** Converts from the Acip transliteration scheme to EWTS.*/
public static String acipToWylie(String acip) public static String acipToWylie(String acip)
@ -252,7 +253,19 @@ public class BasicTibetanTranscriptionConverter implements FontConverterConstant
nuevaPalabra = Manipulate.fixWazur(nuevaPalabra); nuevaPalabra = Manipulate.fixWazur(nuevaPalabra);
return nuevaPalabra;*/ return nuevaPalabra;*/
} }
private static int getTibetanUnicodeStart(String unicode, int pos)
{
for(; pos < unicode.length(); pos++ ) if(unicode.codePointAt(pos)>=TIBETAN_UNICODE_RANGE[0] && unicode.codePointAt(pos)<=TIBETAN_UNICODE_RANGE[1]) return pos;
return -1;
}
private static int getTibetanUnicodeEnd(String unicode, int pos)
{
for(; pos < unicode.length(); pos++ ) if(unicode.codePointAt(pos)<TIBETAN_UNICODE_RANGE[0] || unicode.codePointAt(pos)>TIBETAN_UNICODE_RANGE[1]) return pos;
return pos;
}
/** Converts Tibetan Unicode to EWTS. */ /** Converts Tibetan Unicode to EWTS. */
public static String unicodeToWylie(String unicode) public static String unicodeToWylie(String unicode)
{ {
@ -261,9 +274,9 @@ public class BasicTibetanTranscriptionConverter implements FontConverterConstant
TibetanDocument tibDoc; TibetanDocument tibDoc;
StringBuffer errors; StringBuffer errors;
int posStart=0, posEnd; int posStart=0, posEnd;
while((posStart = Manipulate.getTibetanUnicodeStart(unicode, posStart))>=0) while((posStart = getTibetanUnicodeStart(unicode, posStart))>=0)
{ {
posEnd = Manipulate.getTibetanUnicodeEnd(unicode, posStart+1); posEnd = getTibetanUnicodeEnd(unicode, posStart+1);
startString = unicode.substring(0, posStart); startString = unicode.substring(0, posStart);
tibetanString = unicode.substring(posStart, posEnd); tibetanString = unicode.substring(posStart, posEnd);
endString = unicode.substring(posEnd); endString = unicode.substring(posEnd);

View File

@ -1,463 +1,500 @@
/* /*
The contents of this file are subject to the AMP Open Community License The contents of this file are subject to the AMP Open Community License
Version 1.0 (the "License"); you may not use this file except in compliance Version 1.0 (the "License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License on the AMP web site with the License. You may obtain a copy of the License on the AMP web site
(http://www.tibet.iteso.mx/Guatemala/). (http://www.tibet.iteso.mx/Guatemala/).
Software distributed under the License is distributed on an "AS IS" basis, Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
License for the specific terms governing rights and limitations under the License for the specific terms governing rights and limitations under the
License. License.
The Initial Developer of this software is Andres Montano Pellegrini. Portions The Initial Developer of this software is Andres Montano Pellegrini. Portions
created by Andres Montano Pellegrini are Copyright 2001 Andres Montano created by Andres Montano Pellegrini are Copyright 2001 Andres Montano
Pellegrini. All Rights Reserved. Pellegrini. All Rights Reserved.
Contributor(s): ______________________________________. Contributor(s): ______________________________________.
*/ */
package org.thdl.tib.scanner; package org.thdl.tib.scanner;
/** Miscelaneous static methods for the manipulation of Tibetan text. /** Miscelaneous static methods for the manipulation of Tibetan text.
@author Andr&eacute;s Montano Pellegrini @author Andr&eacute;s Montano Pellegrini
*/ */
public class Manipulate public class Manipulate
{ {
private static String endOfParagraphMarks = "/;|!:^@#$%="; private static String endOfParagraphMarks = "/;|!:^@#$%=,";
private static String bracketMarks = "<>(){}[]"; private static String bracketMarks = "<>(){}[]";
private static String endOfSyllableMarks = " _\t"; private static String endOfSyllableMarks = " _\t";
private static String allStopMarkers = endOfSyllableMarks + endOfParagraphMarks + bracketMarks; private static String allStopMarkers = endOfSyllableMarks + endOfParagraphMarks + bracketMarks;
private static final int TIBETAN_UNICODE_RANGE[] = {3840, 4095}; private static final int TIBETAN_UNICODE_RANGE[] = {3840, 4095};
private static String JSON_ESCAPABLES = "\"\\/";
/* public static String[] parseFields (String s, char delimiter)
{ /* public static String[] parseFields (String s, char delimiter)
int pos; {
String field; int pos;
SimplifiedLinkedList ll = new SimplifiedLinkedList(); String field;
SimplifiedLinkedList ll = new SimplifiedLinkedList();
while ((pos = s.indexOf(delimiter))>=0)
{ while ((pos = s.indexOf(delimiter))>=0)
field = s.substring(0, pos).trim(); {
ll.addLast(field); field = s.substring(0, pos).trim();
s = s.substring(pos+1); ll.addLast(field);
} s = s.substring(pos+1);
}
ll.addLast(s.trim());
return ll.toStringArray(); ll.addLast(s.trim());
}*/ return ll.toStringArray();
}*/
public static int indexOfAnyChar(String str, String chars)
{ public static int indexOfAnyChar(String str, String chars)
int i; {
for (i=0; i<str.length(); i++) int i;
{ for (i=0; i<str.length(); i++)
if (chars.indexOf(str.charAt(i))>=0) {
return i; if (chars.indexOf(str.charAt(i))>=0)
} return i;
}
return -1;
} return -1;
}
public static int indexOfExtendedEndOfSyllableMark(String word)
{ public static int indexOfExtendedEndOfSyllableMark(String word)
return indexOfAnyChar(word, allStopMarkers); {
} return indexOfAnyChar(word, allStopMarkers);
}
public static int indexOfBracketMarks(String word)
{ public static int indexOfBracketMarks(String word)
return indexOfAnyChar(word, bracketMarks); {
} return indexOfAnyChar(word, bracketMarks);
}
public static boolean isPunctuationMark(int ch)
{ public static boolean isPunctuationMark(int ch)
return endOfParagraphMarks.indexOf(ch)>=0 || bracketMarks.indexOf(ch)>=0; {
} return endOfParagraphMarks.indexOf(ch)>=0 || bracketMarks.indexOf(ch)>=0;
}
public static boolean isEndOfParagraphMark(int ch)
{ public static boolean isEndOfParagraphMark(int ch)
return endOfParagraphMarks.indexOf(ch)>=0; {
} return endOfParagraphMarks.indexOf(ch)>=0;
}
public static boolean isEndOfSyllableMark(int ch)
{ public static boolean isEndOfSyllableMark(int ch)
return endOfSyllableMarks.indexOf(ch)>=0; {
} return endOfSyllableMarks.indexOf(ch)>=0;
}
public static boolean isMeaningful(String s)
{ public static boolean isMeaningful(String s)
for (int i=0; i<s.length(); i++) {
if (Character.isLetterOrDigit(s.charAt(i))) return true; for (int i=0; i<s.length(); i++)
if (Character.isLetterOrDigit(s.charAt(i))) return true;
return false;
} return false;
}
public static String replace(String linea, String origSub, String newSub)
{ public static String replace(String linea, String origSub, String newSub)
int pos, lenOrig = origSub.length(); {
while ((pos = linea.indexOf(origSub))!=-1) int pos, lenOrig = origSub.length();
{ while ((pos = linea.indexOf(origSub))!=-1)
linea = linea.substring(0, pos).concat(newSub).concat(linea.substring(pos+lenOrig)); {
} linea = linea.substring(0, pos).concat(newSub).concat(linea.substring(pos+lenOrig));
return linea; }
} return linea;
}
public static String deleteSubstring (String string, int pos, int posEnd)
{ public static String deleteSubstring (String string, int pos, int posEnd)
if (pos<0) return string; {
if (pos<0) return string;
if (pos==0)
{ if (pos==0)
return string.substring(posEnd).trim(); {
} return string.substring(posEnd).trim();
else }
{ else
if (posEnd<string.length()) {
return string.substring(0, pos).concat(string.substring(posEnd)).trim(); if (posEnd<string.length())
else return string.substring(0, pos).concat(string.substring(posEnd)).trim();
return string.substring(0, pos).trim(); else
} return string.substring(0, pos).trim();
} }
}
public static String replace(String string, int pos, int posEnd, String newSub)
{ public static String replace(String string, int pos, int posEnd, String newSub)
if (pos<0) return string; {
if (pos<0) return string;
if (pos==0)
{ if (pos==0)
return newSub.concat(string.substring(posEnd)).trim(); {
} return newSub.concat(string.substring(posEnd)).trim();
else }
{ else
if (posEnd<string.length()) {
return string.substring(0, pos).concat(newSub).concat(string.substring(posEnd)).trim(); if (posEnd<string.length())
else return string.substring(0, pos).concat(newSub).concat(string.substring(posEnd)).trim();
return string.substring(0, pos).concat(newSub).trim(); else
} return string.substring(0, pos).concat(newSub).trim();
} }
}
public static String deleteSubstring (String string, String sub)
{ public static String deleteSubstring (String string, String sub)
int pos = string.indexOf(sub), posEnd = pos + sub.length(); {
return deleteSubstring(string, pos, posEnd); int pos = string.indexOf(sub), posEnd = pos + sub.length();
} return deleteSubstring(string, pos, posEnd);
}
public static String[] addString(String array[], String s, int n)
{ public static String[] addString(String array[], String s, int n)
int i; {
String newArray[] = new String[array.length+1]; int i;
String newArray[] = new String[array.length+1];
for (i=0; i<n; i++)
newArray[i] = array[i]; for (i=0; i<n; i++)
newArray[i] = array[i];
newArray[n] = s;
newArray[n] = s;
for (i=n+1; i<newArray.length; i++)
newArray[i] = array[i-1]; for (i=n+1; i<newArray.length; i++)
newArray[i] = array[i-1];
return newArray;
} return newArray;
}
public static String[] deleteString(String array[], int n)
{ public static String[] deleteString(String array[], int n)
int i; {
int i;
String newArray[] = new String[array.length-1];
String newArray[] = new String[array.length-1];
for (i=0; i<n; i++)
newArray[i] = array[i]; for (i=0; i<n; i++)
newArray[i] = array[i];
for (i=n; i<newArray.length; i++)
newArray[i] = array[i+1]; for (i=n; i<newArray.length; i++)
newArray[i] = array[i+1];
return newArray;
} return newArray;
}
public static boolean isVowel (char ch)
{ public static boolean isVowel (char ch)
ch = Character.toLowerCase(ch); {
return ch=='a' || ch=='e' || ch=='i' || ch=='o' || ch=='u'; ch = Character.toLowerCase(ch);
} return ch=='a' || ch=='e' || ch=='i' || ch=='o' || ch=='u';
}
/** If more than half of the first letters among the first are 10 characters
are uppercase assume its acip */ /** If more than half of the first letters among the first are 10 characters
public static boolean guessIfAcip(String line) are uppercase assume its acip */
{ public static boolean guessIfAcip(String line)
char ch; {
int letters=0, upperCase=0, i, n; char ch;
n = line.length(); int letters=0, upperCase=0, i, n;
if (n>10) n = 10; n = line.length();
for (i=0; i<n; i++) if (n>10) n = 10;
{ for (i=0; i<n; i++)
ch = line.charAt(i); {
if (Character.isLetter(ch)) ch = line.charAt(i);
{ if (Character.isLetter(ch))
letters++; {
if (Character.isUpperCase(ch)) upperCase++; letters++;
} if (Character.isUpperCase(ch)) upperCase++;
} }
if (letters==0 || upperCase==0) return false; }
else return (letters / upperCase < 2); if (letters==0 || upperCase==0) return false;
} else return (letters / upperCase < 2);
}
public static boolean isTibetanUnicodeCharacter(char ch)
{ public static boolean isTibetanUnicodeCharacter(char ch)
return ch>=0xF00 && ch<=0xFFF; {
} return ch>=0xF00 && ch<=0xFFF;
}
public static boolean isTibetanUnicodeLetter(char ch)
{ public static boolean isTibetanUnicodeLetter(char ch)
{
return ch>=0xF40 && ch<=0xFBC || ch>=0xF00 && ch<=0xF03;
} return ch>=0xF40 && ch<=0xFBC || ch>=0xF00 && ch<=0xF03;
}
public static boolean isTibetanUnicodeDigit(char ch)
{ public static boolean isTibetanUnicodeDigit(char ch)
{
return ch>=0xF20 && ch<=0xF33;
} return ch>=0xF20 && ch<=0xF33;
}
public static boolean guessIfUnicode(String line)
{
char ch; public static boolean guessIfUnicode(String line)
int unicode=0, i, n; {
n = line.length(); char ch;
if (n>10) n = 10; int unicode=0, i, n;
for (i=0; i<n; i++) n = line.length();
{ if (n>10) n = 10;
ch = line.charAt(i); for (i=0; i<n; i++)
if (isTibetanUnicodeCharacter(ch)) unicode++; {
} ch = line.charAt(i);
if (n==0 || unicode==0) return false; if (isTibetanUnicodeCharacter(ch)) unicode++;
else return (n / unicode < 2); }
} if (n==0 || unicode==0) return false;
else return (n / unicode < 2);
public static String fixWazur(String linea) }
{
int i; public static String fixWazur(String linea)
{
for (i=1; i<linea.length(); i++) int i;
{
if (linea.charAt(i)=='W') for (i=1; i<linea.length(); i++)
{ {
if (Character.isLetter(linea.charAt(i-1))) if (linea.charAt(i)=='W')
linea = linea.substring(0,i) + 'V' + linea.substring(i+1); {
} if (Character.isLetter(linea.charAt(i-1)))
} linea = linea.substring(0,i) + 'V' + linea.substring(i+1);
return linea; }
} }
return linea;
/** Returns the base letter of a syllable. Does not include the vowel! }
Ignoring cases for now. */
public static String getBaseLetter (String sil) /** Returns the base letter of a syllable. Does not include the vowel!
{ Ignoring cases for now. */
sil = sil.toLowerCase(); public static String getBaseLetter (String sil)
{
int i=0; sil = sil.toLowerCase();
char ch, ch2;
int i=0;
while (!isVowel(sil.charAt(i))) i++; char ch, ch2;
if (i==0) return "";
while (!isVowel(sil.charAt(i)))
i--; {
if (i==-1) return ""; i++;
if (i>=sil.length()) return null;
if (sil.charAt(i)=='-') i--; }
if (i==0) return "";
ch = sil.charAt(i);
i--;
// check to see if it is a subscript (y, r, l, w) if (i==-1) return "";
if (i>0)
{ if (sil.charAt(i)=='-') i--;
switch (ch) if (i>0 && sil.charAt(i)=='w') i--;
{ ch = sil.charAt(i);
case 'r': case 'l': case 'w': i--;
break; // check to see if it is a subscript (y, r, l, w)
case 'y': if (i>0)
ch2 = sil.charAt(i-1); {
switch (ch2) switch (ch)
{ {
case '.': return "y"; case 'r': case 'l': i--;
case 'n': return "ny"; break;
default: i--; case 'y':
} ch2 = sil.charAt(i-1);
} switch (ch2)
} {
if (i==0) return sil.substring(i,i+1); case '.': return "y";
ch = sil.charAt(i); case 'n': return "ny";
ch2 = sil.charAt(i-1); default: i--;
}
switch(ch) }
{ }
case 'h': if (sil.charAt(i)=='+') i--;
switch (ch2) if (i==0) return sil.substring(i,i+1);
{ ch = sil.charAt(i);
case 'k': case 'c': case 't': case 'p': case 'z': ch2 = sil.charAt(i-1);
return sil.substring(i-1,i+1);
case 's': switch(ch)
if (i-2>=0 && sil.charAt(i-2)=='t') return "tsh"; {
else return "sh"; case 'h':
default: return "h"; switch (ch2)
} {
case 's': case 'k': case 'c': case 't': case 'p': case 'z':
if (ch2=='t') return "ts"; return sil.substring(i-1,i+1);
else return "s"; case '+':
case 'g': return sil.substring(i-2, i-1);
if (ch2=='n') return "ng"; case 's':
else return "g"; if (i-2>=0 && sil.charAt(i-2)=='t') return "tsh";
case 'z': else return "sh";
if (ch2=='d') return "dz"; default: return "h";
else return "z"; }
} case 's':
return sil.substring(i,i+1); if (ch2=='t') return "ts";
} else return "s";
case 'g':
public static String deleteQuotes(String s) if (ch2=='n') return "ng";
{ else return "g";
int length = s.length(), pos; case 'z':
if (length>2) if (ch2=='d') return "dz";
{ else return "z";
if ((s.charAt(0)=='\"') && (s.charAt(length-1)=='\"')) }
s = s.substring(1,length-1); return sil.substring(i,i+1);
}
do
{ public static String deleteQuotes(String s)
pos = s.indexOf("\"\""); {
if (pos<0) break; int length = s.length(), pos;
s = Manipulate.deleteSubstring(s, pos, pos+1); if (length>2)
} while (true); {
} if ((s.charAt(0)=='\"') && (s.charAt(length-1)=='\"'))
s = s.substring(1,length-1);
return s;
} do
{
pos = s.indexOf("\"\"");
if (pos<0) break;
/** Syntax: java Manipulate [word-file] < source-dic-entries > dest-dic-entries s = Manipulate.deleteSubstring(s, pos, pos+1);
} while (true);
Takes the output of ConsoleScannerFilter }
(in RY format), converts the Wylie to Acip
and displays the result in csv format. return s;
arch-palabras es usado solo cuando deseamos las palabras cambiadas }
a otro archivo.
public static void main (String[] args) throws Exception /** Syntax: java Manipulate [word-file] < source-dic-entries > dest-dic-entries
{
String linea, palabra, definicion, nuevaPalabra; Takes the output of ConsoleScannerFilter
int marker; (in RY format), converts the Wylie to Acip
PrintWriter psPalabras = null; and displays the result in csv format.
arch-palabras es usado solo cuando deseamos las palabras cambiadas
BufferedReader keyb = new BufferedReader(new InputStreamReader(System.in)); a otro archivo.
if (args.length==1)
psPalabras = new PrintWriter(new FileOutputStream(args[0])); public static void main (String[] args) throws Exception
{
while ((linea=keyb.readLine())!=null) String linea, palabra, definicion, nuevaPalabra;
{ int marker;
if (linea.trim().equals("")) continue; PrintWriter psPalabras = null;
marker = linea.indexOf('-');
if (marker<0) // linea tiene error BufferedReader keyb = new BufferedReader(new InputStreamReader(System.in));
{
palabra = linea; if (args.length==1)
definicion = ""; psPalabras = new PrintWriter(new FileOutputStream(args[0]));
}
else while ((linea=keyb.readLine())!=null)
{ {
palabra = linea.substring(0, marker).trim(); if (linea.trim().equals("")) continue;
definicion = linea.substring(marker+1).trim(); marker = linea.indexOf('-');
} if (marker<0) // linea tiene error
{
nuevaPalabra = wylieToAcip(palabra); palabra = linea;
definicion = "";
if (psPalabras!=null) }
psPalabras.println(nuevaPalabra); else
else System.out.print(nuevaPalabra + '\t'); {
if (definicion.equals("")) palabra = linea.substring(0, marker).trim();
System.out.println(palabra); definicion = linea.substring(marker+1).trim();
else }
System.out.println(palabra + '\t' + definicion);
} nuevaPalabra = wylieToAcip(palabra);
if (psPalabras!=null) psPalabras.flush();
}*/ if (psPalabras!=null)
psPalabras.println(nuevaPalabra);
/** From http://www.i18nfaq.com/2005/07/how-do-i-convert-ncr-format-to-java.html */ else System.out.print(nuevaPalabra + '\t');
public static String NCR2UnicodeString(String str) if (definicion.equals(""))
{ System.out.println(palabra);
StringBuffer ostr = new StringBuffer(); else
int i1=0; System.out.println(palabra + '\t' + definicion);
int i2=0; }
if (psPalabras!=null) psPalabras.flush();
while(i2<str.length()) }*/
{
i1 = str.indexOf("&#",i2); /** From http://www.i18nfaq.com/2005/07/how-do-i-convert-ncr-format-to-java.html */
if (i1 == -1 ) { public static String NCR2UnicodeString(String str)
ostr.append(str.substring(i2, str.length())); {
break ; StringBuffer ostr = new StringBuffer();
} int i1=0;
ostr.append(str.substring(i2, i1)); int i2=0;
i2 = str.indexOf(";", i1);
if (i2 == -1 ) { while(i2<str.length())
ostr.append(str.substring(i1, str.length())); {
break ; i1 = str.indexOf("&#",i2);
} if (i1 == -1 ) {
ostr.append(str.substring(i2, str.length()));
String tok = str.substring(i1+2, i2); break ;
try { }
int radix = 10 ; ostr.append(str.substring(i2, i1));
if (tok.trim().charAt(0) == 'x') { i2 = str.indexOf(";", i1);
radix = 16 ; if (i2 == -1 ) {
tok = tok.substring(1,tok.length()); ostr.append(str.substring(i1, str.length()));
} break ;
ostr.append((char) Integer.parseInt(tok, radix)); }
} catch (NumberFormatException exp) {
ostr.append('?') ; String tok = str.substring(i1+2, i2);
} try {
i2++ ; int radix = 10 ;
} if (tok.trim().charAt(0) == 'x') {
return new String(ostr) ; radix = 16 ;
} tok = tok.substring(1,tok.length());
}
public static String UnicodeString2NCR(String str) ostr.append((char) Integer.parseInt(tok, radix));
{ } catch (NumberFormatException exp) {
StringBuffer ncr = new StringBuffer(); ostr.append('?') ;
int i; }
for (i=0; i<str.length(); i++) i2++ ;
{ }
ncr.append("&#" + Integer.toString(str.charAt(i)) + ";"); return new String(ostr) ;
} }
return ncr.toString();
} public static String UnicodeString2NCR(String str)
{
public static String unescape(String s) { StringBuffer ncr = new StringBuffer();
int i=0,len=s.length(); int i;
char c; for (i=0; i<str.length(); i++)
StringBuffer sb = new StringBuffer(len); {
while (i<len) { ncr.append("&#" + Integer.toString(str.charAt(i)) + ";");
c = s.charAt(i++); }
if (c=='\\') { return ncr.toString();
if (i<len) { }
c = s.charAt(i++);
if (c=='u') { public static String toJSON(String str)
c = (char) Integer.parseInt(s.substring(i,i+4),16); {
i += 4; int pos, i, len;
} // add other cases here as desired... for (i=0; i<str.length(); i++)
}} // fall through: \ escapes itself, quotes any character but u {
sb.append(c); pos = JSON_ESCAPABLES.indexOf(str.charAt(i));
} if (pos>=0)
return sb.toString(); {
} len = str.length();
str = str.substring(0, i) + "\\" + str.substring(i, len);
public static int getTibetanUnicodeStart(String unicode, int pos) i++;
{ }
for(; pos < unicode.length(); pos++ ) if(unicode.codePointAt(pos)>=TIBETAN_UNICODE_RANGE[0] && unicode.codePointAt(pos)<=TIBETAN_UNICODE_RANGE[1]) return pos; }
return -1; str = replace(str, "\b", "\\b");
} str = replace(str, "\f", "\\f");
str = replace(str, "\n", "\\n");
public static int getTibetanUnicodeEnd(String unicode, int pos) str = replace(str, "\r", "\\r");
{ str = replace(str, "\t", "\\t");
for(; pos < unicode.length(); pos++ ) if(unicode.codePointAt(pos)<TIBETAN_UNICODE_RANGE[0] || unicode.codePointAt(pos)>TIBETAN_UNICODE_RANGE[1]) return pos; return str;
return pos; }
}
public static boolean containsLetters(String str)
} {
int i=0;
if (str==null) return false;
while (i<str.length()) if (Character.isLetter(str.charAt(i++))) return true;
return false;
}
public static String unescape(String s) {
int i=0,len=s.length();
char c;
StringBuffer sb = new StringBuffer(len);
while (i<len) {
c = s.charAt(i++);
if (c=='\\') {
if (i<len) {
c = s.charAt(i++);
if (c=='u') {
c = (char) Integer.parseInt(s.substring(i,i+4),16);
i += 4;
} // add other cases here as desired...
}} // fall through: \ escapes itself, quotes any character but u
sb.append(c);
}
return sb.toString();
}
public static int getTibetanUnicodeStart(String unicode, int pos)
{
for(; pos < unicode.length(); pos++ ) if(unicode.codePointAt(pos)>=TIBETAN_UNICODE_RANGE[0] && unicode.codePointAt(pos)<=TIBETAN_UNICODE_RANGE[1]) return pos;
return -1;
}
public static int getTibetanUnicodeEnd(String unicode, int pos)
{
for(; pos < unicode.length(); pos++ ) if(unicode.codePointAt(pos)<TIBETAN_UNICODE_RANGE[0] || unicode.codePointAt(pos)>TIBETAN_UNICODE_RANGE[1]) return pos;
return pos;
}
}

View File

@ -1,465 +1,507 @@
/* /*
The contents of this file are subject to the AMP Open Community License The contents of this file are subject to the AMP Open Community License
Version 1.0 (the "License"); you may not use this file except in compliance Version 1.0 (the "License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License on the AMP web site with the License. You may obtain a copy of the License on the AMP web site
(http://www.tibet.iteso.mx/Guatemala/). (http://www.tibet.iteso.mx/Guatemala/).
Software distributed under the License is distributed on an "AS IS" basis, Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
License for the specific terms governing rights and limitations under the License for the specific terms governing rights and limitations under the
License. License.
The Initial Developer of this software is Andres Montano Pellegrini. Portions The Initial Developer of this software is Andres Montano Pellegrini. Portions
created by Andres Montano Pellegrini are Copyright 2001 Andres Montano created by Andres Montano Pellegrini are Copyright 2001 Andres Montano
Pellegrini. All Rights Reserved. Pellegrini. All Rights Reserved.
Contributor(s): ______________________________________. Contributor(s): ______________________________________.
*/ */
package org.thdl.tib.scanner; package org.thdl.tib.scanner;
import java.io.PrintWriter; import java.io.PrintWriter;
import java.util.MissingResourceException; import java.util.MissingResourceException;
import java.util.ResourceBundle; import java.util.ResourceBundle;
import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse; import javax.servlet.http.HttpServletResponse;
import org.thdl.util.ThdlOptions; import org.thdl.util.ThdlOptions;
/** Interfase to provide access to an on-line dictionary through a form in html; /** Interfase to provide access to an on-line dictionary through a form in html;
Inputs Tibetan text (Roman script only) and displays the Inputs Tibetan text (Roman script only) and displays the
words (Roman or Tibetan script) with their definitions. words (Roman or Tibetan script) with their definitions.
Runs on the server and is called upon through an HTTP request directly Runs on the server and is called upon through an HTTP request directly
by the browser. Requires no additional software installed on the client. by the browser. Requires no additional software installed on the client.
@author Andr&eacute;s Montano Pellegrini @author Andr&eacute;s Montano Pellegrini
*/ */
public class OnLineScannerFilter extends HttpServlet public class OnLineScannerFilter extends HttpServlet
{ {
private final static String propertyFile = "dictionary"; private final static String propertyFile = "dictionary";
private final static String dictNameProperty = "onlinescannerfilter.dict-file-name"; private final static String dictNameProperty = "onlinescannerfilter.dict-file-name";
private final static String otherLinksProperty = "onlinescannerfilter.links-to-other-stuff"; private final static String otherLinksProperty = "onlinescannerfilter.links-to-other-stuff";
private final static String moreLinksProperty = "onlinescannerfilter.links-to-more-stuff"; private final static String moreLinksProperty = "onlinescannerfilter.links-to-more-stuff";
private final static String clearStr = "Clear"; private final static String smallerLinksProperty = "onlinescannerfilter.links-to-smaller-stuff";
private final static String buttonStr = "button"; private final static String clearStr = "Clear";
private final static String scriptStr = "script"; private final static String buttonStr = "button";
private final static String tibetanStr = "tibetan"; private final static String scriptStr = "script";
private final static String tibetanStr = "tibetan";
ResourceBundle rb;
private TibetanScanner scanner; ResourceBundle rb;
private String dictionaries[]; private TibetanScanner scanner;
private ScannerLogger sl; private String dictionaries[];
private ScannerLogger sl;
public OnLineScannerFilter() //throws Exception
{ public OnLineScannerFilter() //throws Exception
rb = ResourceBundle.getBundle(propertyFile); {
sl = new ScannerLogger(); System.setProperty("java.awt.headless","true");
rb = ResourceBundle.getBundle(propertyFile);
try sl = new ScannerLogger();
{
scanner = new LocalTibetanScanner(rb.getString(dictNameProperty), false); try
} {
catch (Exception e) scanner = new LocalTibetanScanner(rb.getString(dictNameProperty), false);
{ }
sl.writeLog("1\t1"); catch (Exception e)
sl.writeException(e); {
} sl.writeLog("1\t1");
sl.writeException(e);
dictionaries = scanner.getDictionaryDescriptions(); }
sl.writeLog("2\t1");
} dictionaries = scanner.getDictionaryDescriptions();
sl.writeLog("2\t1");
synchronized public void doGet(HttpServletRequest request, }
HttpServletResponse response) //throws IOException, ServletException
{ synchronized public void doGet(HttpServletRequest request,
String answer, parrafo = null, checkboxName; HttpServletResponse response) //throws IOException, ServletException
{
// if this line is included in the constructor, it works on the orion server but not on wyllie! String answer, parrafo = null, checkboxName;
ThdlOptions.setUserPreference("thdl.rely.on.system.tmw.fonts", true); try
ThdlOptions.setUserPreference("thdl.rely.on.system.tm.fonts", true); {
request.setCharacterEncoding("UTF8");
response.setContentType("text/html"); }
PrintWriter out; catch(Exception e)
sl.setUserIP(request.getRemoteAddr()); {
// do nothing
try }
{ // if this line is included in the constructor, it works on the orion server but not on wyllie!
out = response.getWriter(); ThdlOptions.setUserPreference("thdl.rely.on.system.tmw.fonts", true);
} ThdlOptions.setUserPreference("thdl.rely.on.system.tm.fonts", true);
catch (Exception e)
{ response.setContentType("text/html");
sl.writeLog("1\t1"); PrintWriter out;
sl.writeException(e); sl.setUserIP(request.getRemoteAddr());
return;
} try
{
BitDictionarySource ds=null; out = response.getWriter();
boolean checkedDicts[], allUnchecked, wantsTibetan, useTHDLBanner = (request.getParameter("thdlBanner")!=null); }
// int percent=100; catch (Exception e)
{
out.println("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">"); sl.writeLog("1\t1");
out.println("<html xmlns=\"http://www.w3.org/1999/xhtml\">"); sl.writeException(e);
out.println("<head>"); return;
if (useTHDLBanner) }
{
out.println(" <title>Tibetan and Himalayan Digital Library - The Online Tibetan to English Translation/Dictionary Tool</title>"); BitDictionarySource ds=null;
out.println(" <meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />"); boolean checkedDicts[], allUnchecked, wantsTibetan, useTHDLBanner = (request.getParameter("thdlBanner")!=null);
out.println(" <script type=\"text/javascript\" src=\"http://www.thdl.org/scripts/thdl_scripts.js\"></script>"); // int percent=100;
out.println(" <link rel=\"stylesheet\" type=\"text/css\" href=\"http://www.thdl.org/style/thdl-styles.css\"/>");
} out.println("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">");
else out.println("<html xmlns=\"http://www.w3.org/1999/xhtml\">");
out.println(" <title>The Online Tibetan to English Translation/Dictionary Tool</title>"); out.println("<head>");
out.println(" <meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />");
out.println(" <meta name=\"keywords\" content=\"tibetan, english, dictionary, jim valby, rangjung yeshe, jeffrey hopkins, tsig mdzod chen mo, online, translation, scanner, parser, buddhism, language, processing, font, dharma, chos, tibet\">"); if (useTHDLBanner)
out.println(" <meta name=\"Description\" content=\"This Java tool takes Tibetan language passages and divides the passages up into their component phrases and words, and displays corresponding dictionary definitions.\">"); {
out.println(" <meta name=\"MSSmartTagsPreventParsing\" content=\"TRUE\">"); out.println(" <title>Tibetan and Himalayan Digital Library - The Online Tibetan to English Dictionary and Translation Tool</title>");
out.println(" <script type=\"text/javascript\" src=\"http://www.thdl.org/scripts/thdl_scripts.js\"></script>");
answer = request.getParameter(scriptStr); out.println(" <link rel=\"stylesheet\" type=\"text/css\" href=\"http://www.thdl.org/style/thdl-styles.css\"/>");
}
/* script==null || makes default tibetan else
script!=null && makes default roman {
*/ out.println(" <title>The Online Tibetan to English Dictionary and Translation Tool</title>");
wantsTibetan = (answer==null || answer.equals(tibetanStr)); out.println(" <link rel=\"stylesheet\" type=\"text/css\" href=\"stylesheets/base.css\"/>");
/*if (wantsTibetan) }
{
out.println("<style>.tmw {font: 28pt TibetanMachineWeb}"); out.println(" <meta name=\"keywords\" content=\"tibetan, english, dictionary, jim valby, rangjung yeshe, jeffrey hopkins, tsig mdzod chen mo, online, translation, scanner, parser, buddhism, language, processing, font, dharma, chos, tibet\">");
out.println(".tmw1 {font: 28pt TibetanMachineWeb1}"); out.println(" <meta name=\"Description\" content=\"This Java tool takes Tibetan language passages and divides the passages up into their component phrases and words, and displays corresponding dictionary definitions.\">");
out.println(".tmw2 {font: 28pt TibetanMachineWeb2}"); out.println(" <meta name=\"MSSmartTagsPreventParsing\" content=\"TRUE\">");
out.println(".tmw3 {font: 28pt TibetanMachineWeb3}");
out.println(".tmw4 {font: 28pt TibetanMachineWeb4}"); answer = request.getParameter(scriptStr);
out.println(".tmw5 {font: 28pt TibetanMachineWeb5}");
out.println(".tmw6 {font: 28pt TibetanMachineWeb6}"); /* script==null || makes default tibetan
out.println(".tmw7 {font: 28pt TibetanMachineWeb7}"); script!=null && makes default roman
out.println(".tmw8 {font: 28pt TibetanMachineWeb8}"); */
out.println(".tmw9 {font: 28pt TibetanMachineWeb9}"); wantsTibetan = (answer==null || answer.equals(tibetanStr));
out.println("</style>"); /*if (wantsTibetan)
}*/ {
out.println("</head>"); out.println("<style>.tmw {font: 28pt TibetanMachineWeb}");
out.println("<body>"); out.println(".tmw1 {font: 28pt TibetanMachineWeb1}");
out.println(".tmw2 {font: 28pt TibetanMachineWeb2}");
if (useTHDLBanner) out.println(".tmw3 {font: 28pt TibetanMachineWeb3}");
{ out.println(".tmw4 {font: 28pt TibetanMachineWeb4}");
out.println("<script type=\"text/javascript\" src=\"http://www.thdl.org/thdl/scripts/banner.js\"></script>"); out.println(".tmw5 {font: 28pt TibetanMachineWeb5}");
out.println("<div id=\"sub_banner\">"); out.println(".tmw6 {font: 28pt TibetanMachineWeb6}");
out.println("<div id=\"search\">"); out.println(".tmw7 {font: 28pt TibetanMachineWeb7}");
out.println(" <form method=\"get\" action=\"http://www.google.com/u/thdl\">"); out.println(".tmw8 {font: 28pt TibetanMachineWeb8}");
out.println(" <p>"); out.println(".tmw9 {font: 28pt TibetanMachineWeb9}");
out.println(" <input type=\"text\" name=\"q\" id=\"q\" size=\"15\" maxlength=\"255\" value=\"\" />"); out.println("</style>");
out.println(" <input type=\"submit\" name=\"sa\" id=\"sa\" value=\"Search\"/>"); }*/
out.println(" <input type=\"hidden\" name=\"hq\" id=\"hq\" value=\"inurl:orion.lib.virginia.edu\"/>"); out.println("</head>");
out.println(" </p>"); out.println("<body>");
out.println(" </form>");
out.println(" </div>"); if (useTHDLBanner)
out.println(" <div id=\"breadcrumbs\">"); {
out.println(" <a href=\"/thdl/index.html\">Home</a> &gt; <a href=\"/thdl/reference/index.html\">Reference</a> &gt; Translation Tool"); out.println("<script type=\"text/javascript\" src=\"http://www.thdl.org/thdl/scripts/banner.js\"></script>");
out.println(" </div>"); out.println("<div id=\"sub_banner\">");
out.println("</div><!--END sub_banner-->"); out.println("<div id=\"search\">");
out.println("<div id=\"main\">"); out.println(" <form method=\"get\" action=\"http://www.google.com/u/thdl\">");
} out.println(" <p>");
out.println(" <input type=\"text\" name=\"q\" id=\"q\" size=\"15\" maxlength=\"255\" value=\"\" />");
out.println("<h3 align=\"center\">The Online Tibetan to English Translation/Dictionary Tool</h3>"); out.println(" <input type=\"submit\" name=\"sa\" id=\"sa\" value=\"Search\"/>");
out.println(" <input type=\"hidden\" name=\"hq\" id=\"hq\" value=\"inurl:orion.lib.virginia.edu\"/>");
try out.println(" </p>");
{ out.println(" </form>");
out.println(rb.getString(otherLinksProperty)); out.println(" </div>");
} out.println(" <div id=\"breadcrumbs\">");
catch (MissingResourceException e) out.println(" <a href=\"/thdl/index.html\">Home</a> &gt; <a href=\"/thdl/reference/index.html\">Reference</a> &gt; Translation Tool");
{ out.println(" </div>");
// do nothing out.println("</div><!--END sub_banner-->");
} out.println("<div id=\"main\">");
}
if (useTHDLBanner) try
{ {
out.println("<form action=\"org.thdl.tib.scanner.OnLineScannerFilter?thdlBanner=on\" method=POST>"); out.println(rb.getString(otherLinksProperty));
} }
else catch (MissingResourceException e)
{ {
out.println("<form action=\"org.thdl.tib.scanner.OnLineScannerFilter\" method=POST>"); // do nothing
} }
out.println("<table border=\"0\" width=\"100%\">");
out.println(" <tr>"); if (useTHDLBanner)
out.println(" <td width=\"25%\">"); {
out.println(" <p>Display results in:</td>"); out.println("<form action=\"org.thdl.tib.scanner.OnLineScannerFilter?thdlBanner=on\" method=POST>");
out.println(" <td width=\"75%\">"); }
out.println(" <p><input type=\"radio\" value=\"" + tibetanStr + "\" "); else
if (wantsTibetan) out.println("checked "); {
out.println("name=\"" + scriptStr + "\">Tibetan script (using <a href=\"http://www.thdl.org/xml/show.php?xml=/tools/tibfonts.xml&l=uva10928423419921\" target=\"_blank\">Tibetan Machine Uni font</a>)<br/>"); out.println("<form action=\"org.thdl.tib.scanner.OnLineScannerFilter\" method=POST>");
out.println(" <input type=\"radio\" value=\"roman\" "); }
if (!wantsTibetan) out.println("checked "); out.println("<table border=\"0\" width=\"100%\">");
out.println("name=\"" + scriptStr + "\">Roman script</td>"); out.println(" <tr>");
out.println(" </tr>"); out.println(" <td width=\"18%\" align=\"left\"><strong>Display results in:</strong></td>");
out.println("</table>"); out.println(" <td width=\"41%\" align=\"right\">");
out.println(" <input type=\"radio\" value=\"" + tibetanStr + "\" ");
if (dictionaries!=null) if (wantsTibetan) out.println("checked ");
{ out.println("name=\"" + scriptStr + "\">Tibetan script (<a href=\"http://www.thlib.org/tools/#wiki=/access/wiki/site/26a34146-33a6-48ce-001e-f16ce7908a6a/tibetan%20machine%20uni.html\" target=\"_top\">Tibetan Machine Uni</a> font)</td>");
int i; out.println(" <td width=\"16%\" align=\"left\">");
ds = scanner.getDictionarySource(); out.println(" <input type=\"radio\" value=\"roman\" ");
ds.reset(); if (!wantsTibetan) out.println("checked ");
checkedDicts = new boolean[dictionaries.length]; out.println("name=\"" + scriptStr + "\">Roman script</td>");
/* out.println(" <tr>"); out.println(" <td width=\"25%\" align=\"right\">");
out.println("<td width=\""+ percent +"%\">Search in dictionaries:</td>");*/ out.println("<a href=\"http://www.thlib.org/tools/#wiki=/access/wiki/site/c06fa8cf-c49c-4ebc-007f-482de5382105/tibetan%20translation%20tool.html\" target=\"_top\">Help & Offline Installation</a></td>");
out.println("<p>Search in dictionaries: "); out.println(" </tr>");
allUnchecked=true; if (dictionaries!=null)
for (i=0; i<dictionaries.length; i++) {
{ int i;
checkboxName = "dict"+ i; ds = scanner.getDictionarySource();
checkedDicts[i] = (request.getParameter(checkboxName)!=null); ds.reset();
} checkedDicts = new boolean[dictionaries.length];
allUnchecked=true; /* out.println(" <tr>");
for (i=0; i<dictionaries.length; i++) out.println("<td width=\""+ percent +"%\">Search in dictionaries:</td>");*/
{ out.println("<tr><td colspan=\"4\"><strong>Search in dictionaries: </strong>");
if(checkedDicts[i]) allUnchecked=true;
{ for (i=0; i<dictionaries.length; i++)
allUnchecked=false; {
break; checkboxName = "dict"+ i;
} checkedDicts[i] = (request.getParameter(checkboxName)!=null);
} }
allUnchecked=true;
if (allUnchecked) for (i=0; i<dictionaries.length; i++)
{ {
for (i=0; i<dictionaries.length; i++) if(checkedDicts[i])
checkedDicts[i] = true; {
} allUnchecked=false;
break;
for (i=0; i<dictionaries.length; i++) }
{ }
checkboxName = "dict"+ i;
// out.print(" <td width=\"" + percent + "%\">"); if (allUnchecked)
out.print("<input type=\"checkbox\" name=\"" + checkboxName +"\" value=\""+ checkboxName +"\""); {
if (checkedDicts[i]) for (i=0; i<dictionaries.length; i++)
{ checkedDicts[i] = true;
out.print(" checked"); }
ds.add(i);
} for (i=0; i<dictionaries.length; i++)
if (dictionaries[i]!=null) {
out.print(">" + dictionaries[i] + " (" + DictionarySource.defTags[i] + ")&nbsp;&nbsp;&nbsp;"); checkboxName = "dict"+ i;
else // out.print(" <td width=\"" + percent + "%\">");
out.print(">" + DictionarySource.defTags[i] + "&nbsp;&nbsp;&nbsp;"); out.print("<input type=\"checkbox\" name=\"" + checkboxName +"\" value=\""+ checkboxName +"\"");
// out.println(" + "</td>"); if (checkedDicts[i])
} {
// out.println(" </tr>"); out.print(" checked");
} ds.add(i);
// fix for updates }
else ds = BitDictionarySource.getAllDictionaries(); if (dictionaries[i]!=null)
// out.println("</table>"); out.print(">" + dictionaries[i] + " (" + DictionarySource.defTags[i] + ")&nbsp;&nbsp;&nbsp;");
out.println("</p>"); else
out.println("<table border=\"0\" width=\"100%\">"); out.print(">" + DictionarySource.defTags[i] + "&nbsp;&nbsp;&nbsp;");
out.println(" <tr>"); // out.println(" + "</td>");
out.println(" <td width=\"35%\">"); }
out.println(" <p><strong>Input text:</strong></p>"); out.println(" </td></tr>");
out.println(" </td>"); }
out.println(" <td width=\"65%\">"); // fix for updates
out.println(" <p> <input type=\"submit\" name=\"" + buttonStr + "\" value=\"Translate\"> <input type=\"submit\" name=\"" + buttonStr + "\" value=\"" + clearStr + "\"></p>"); else ds = BitDictionarySource.getAllDictionaries();
out.println(" </td>"); // out.println("</table>");
out.println(" </tr>"); // out.println("</p>");
out.println("</table>"); // out.println("<table border=\"0\" width=\"100%\">");
out.println(" <tr>");
out.print("<textarea rows=\"5\" name=\"parrafo\" cols=\"40\""); out.println(" <td><strong>Input text:</strong></td>");
if (wantsTibetan) out.print(" class=\"tib\""); out.println(" <td><input type=\"submit\" name=\"" + buttonStr + "\" value=\"Translate\"> <input type=\"submit\" name=\"" + buttonStr + "\" value=\"" + clearStr + "\"></td>");
out.println(">"); out.println(" <td colspan\"2\">&nbsp;</td");
out.println(" </tr>");
// Paragraph should be empty if the user just clicked the clear button out.println("</table>");
answer = request.getParameter(buttonStr); answer = request.getParameter(buttonStr);
if (answer == null || answer != null && !answer.equals(clearStr)) String smallerLinks=null;
{ if (answer == null || answer != null && !answer.equals(clearStr))
parrafo = request.getParameter("parrafo"); {
if (parrafo!=null) out.print(parrafo); parrafo = request.getParameter("parrafo");
} }
if (parrafo==null)
out.println("</textarea>"); {
out.println("</form>"); try
try {
{ smallerLinks = rb.getString(smallerLinksProperty);
out.println(rb.getString(moreLinksProperty)); }
} catch (MissingResourceException e)
catch (MissingResourceException e) {
{ // do nothing
// do nothing }
}
}
if (parrafo != null) if (smallerLinks!=null)
{ {
sl.writeLog("4\t1"); out.println("<table width=\"100%\">");
if (ds!=null && !ds.isEmpty()) out.println("<tr>");
desglosar(parrafo, out, wantsTibetan); out.println("<td>");
} }
else sl.writeLog("3\t1");
out.print("<textarea rows=\"5\" name=\"parrafo\" cols=\"40\"");
out.println(TibetanScanner.copyrightHTML); if (wantsTibetan) out.print(" class=\"tib\"");
if (useTHDLBanner) out.println("</div><!--END main-->"); out.println(">");
out.println("</body>");
out.println("</html>"); // Paragraph should be empty if the user just clicked the clear button
} answer = request.getParameter(buttonStr);
if (parrafo!=null)
public void doPost(HttpServletRequest request, {
HttpServletResponse response) out.print(parrafo);
//throws IOException, ServletException }
{ out.println("</textarea>");
doGet(request, response); if (smallerLinks!=null)
} {
out.println("</td>");
synchronized public void desglosar(String in, PrintWriter pw, boolean tibetan) out.println("<td>");
{ out.println(smallerLinks);
//boolean hayMasLineas=true; out.println("</td>");
//int init = 0, fin; out.println("</tr>");
//String linea; out.println("</table>");
Object words[]; }
if (!in.equals("")) out.println("</form>");
{
/* while (hayMasLineas) if (parrafo != null)
{ {
fin = in.indexOf("\n",init); sl.writeLog("4\t1");
if (fin<0) if (ds!=null && !ds.isEmpty())
{ {
linea = in.substring(init).trim(); desglosar(parrafo, out, wantsTibetan);
hayMasLineas=false; }
} }
else else sl.writeLog("3\t1");
linea = in.substring(init, fin).trim();
out.println(TibetanScanner.copyrightHTML);
scanner.scanBody(linea); if (useTHDLBanner) out.println("</div><!--END main-->");
out.println("</body>");
init = fin+1; out.println("</html>");
} */ }
scanner.clearTokens();
in = Manipulate.NCR2UnicodeString(in); public void doPost(HttpServletRequest request,
if (Manipulate.guessIfUnicode(in)) in = BasicTibetanTranscriptionConverter.unicodeToWylie(in); HttpServletResponse response)
else if (Manipulate.guessIfAcip(in)) in = BasicTibetanTranscriptionConverter.acipToWylie(in); //throws IOException, ServletException
scanner.scanBody(in); {
scanner.finishUp(); doGet(request, response);
printText(pw, tibetan); }
printAllDefs(pw, tibetan);
scanner.clearTokens(); synchronized public void desglosar(String in, PrintWriter pw, boolean tibetan)
} {
} //boolean hayMasLineas=true;
//int init = 0, fin;
public void printText(PrintWriter pw, boolean tibetan) String tmp;
{ Object words[];
Token words[] = scanner.getTokenArray();
SwingWord word; if (!in.equals(""))
char pm; {
int i; /* while (hayMasLineas)
{
if (words==null) return; fin = in.indexOf("\n",init);
if (fin<0)
pw.print("<p>"); {
for (i=0; i < words.length; i++) linea = in.substring(init).trim();
{ hayMasLineas=false;
}
if (words[i] instanceof Word) else
{ linea = in.substring(init, fin).trim();
word = new SwingWord((Word)words[i]);
// if (word.getDefs().getDictionarySource()!=null) scanner.scanBody(linea);
pw.print(word.getLink(tibetan));
// else pw.print(word.getWylie() + " "); init = fin+1;
} } */
else scanner.clearTokens();
{ in = Manipulate.NCR2UnicodeString(in);
if (words[i] instanceof PunctuationMark) if (Manipulate.guessIfUnicode(in)) in = BasicTibetanTranscriptionConverter.unicodeToWylie(in);
{ else if (Manipulate.guessIfAcip(in)) in = BasicTibetanTranscriptionConverter.acipToWylie(in);
pm = words[i].toString().charAt(0); scanner.scanBody(in);
switch (pm) scanner.finishUp();
{ printText(pw, tibetan);
case '\n': try
pw.println("</p>"); {
pw.print("<p>"); tmp = rb.getString(moreLinksProperty);
break; pw.println("<p>");
case '<': pw.println(tmp);
pw.print("&lt; "); pw.println("</p>");
break; }
case '>': catch (MissingResourceException e)
pw.print("&gt; "); {
break; // do nothing
default: }
pw.print(pm + " "); printAllDefs(pw, tibetan);
} scanner.clearTokens();
} }
} }
}
pw.println("</p>"); public void printText(PrintWriter pw, boolean tibetan)
} {
Token words[] = scanner.getTokenArray();
public void printAllDefs(PrintWriter pw, boolean tibetan) { SwingWord word;
int i, j, k=0; char pm;
Word words[]; int i;
SwingWord word = null;
Definitions defs; if (words==null) return;
String tag;
DictionarySource ds; pw.print("<p>");
ByteDictionarySource sourceb=null; for (i=0; i < words.length; i++)
{
words = scanner.getWordArray(false);
if (words[i] instanceof Word)
if (words == null) {
return; word = new SwingWord((Word)words[i]);
pw.println("<table border=\"1\" width=\"100%\">"); // if (word.getDefs().getDictionarySource()!=null)
pw.print(word.getLink(tibetan));
for (j = 0; j < words.length; j++) { // else pw.print(word.getWylie() + " ");
try { }
else
word = new SwingWord(words[j]); {
defs = word.getDefs(); if (words[i] instanceof PunctuationMark)
ds = defs.getDictionarySource(); {
pw.println(" <tr>"); pm = words[i].toString().charAt(0);
if (ds == null) { switch (pm)
tag = "&nbsp;"; {
} case '\n':
else { pw.println("</p>");
if (FileSyllableListTree.versionNumber==2) { pw.print("<p>");
tag = ds.getTag(0); break;
} case '<':
else { pw.print("&lt; ");
sourceb = (ByteDictionarySource) ds; break;
k=0; case '>':
while (sourceb.isEmpty(k)) k++; pw.print("&gt; ");
tag = sourceb.getTag(k); break;
k++; default:
} pw.print(pm + " ");
} }
}
pw.print(" <td width=\"20%\" rowspan=\"" + defs.def.length }
+ "\" valign=\"top\""); }
if (tibetan) pw.print(" class=\"tib\""); pw.println("</p>");
pw.println(">" + word.getBookmark(tibetan) + "</td>"); }
pw.println(" <td width=\"12%\">" + tag + "</td>");
pw.println(" <td width=\"68%\">" + defs.def[0] + "</td>"); public void printAllDefs(PrintWriter pw, boolean tibetan) {
int i, j, k=0;
pw.println(" </tr>"); Word words[];
for (i = 1; i < defs.def.length; i++) { SwingWord word = null;
pw.println(" <tr>"); Definitions defs;
String tag;
if (FileSyllableListTree.versionNumber==2) { DictionarySource ds;
tag = ds.getTag(i); ByteDictionarySource sourceb=null;
}
else { words = scanner.getWordArray(false);
while (sourceb.isEmpty(k)) k++;
tag = sourceb.getTag(k); if (words == null) return;
k++; pw.println("<table border=\"1\" width=\"100%\">");
}
for (j = 0; j < words.length; j++) {
pw.println(" <td width=\"12%\">" + tag + "</td>"); try {
pw.println(" <td width=\"68%\">" + defs.def[i] + "</td>");
//else pw.println(" <td width=\"80%\" colspan=\"2\">" + defs.def[i] + "</td>"); word = new SwingWord(words[j]);
pw.println(" </tr>"); defs = word.getDefs();
} ds = defs.getDictionarySource();
} catch (Exception e) { pw.println(" <tr>");
sl.writeLog("1\t1\t" + word.getWylie()); if (ds == null) {
sl.writeException(e); tag = "&nbsp;";
} }
else {
} if (FileSyllableListTree.versionNumber==2) {
pw.println("</table>"); tag = ds.getTag(0);
} }
else {
public void destroy() sourceb = (ByteDictionarySource) ds;
{ k=0;
super.destroy(); while (sourceb.isEmpty(k)) k++;
sl.setUserIP(null); tag = sourceb.getTag(k);
sl.writeLog("5\t1"); k++;
scanner.destroy(); }
} }
} pw.print(" <td width=\"20%\" rowspan=\"" + defs.def.length
+ "\" valign=\"top\"");
if (tibetan) pw.print(" class=\"tib\"");
pw.println(">" + word.getBookmark(tibetan) + "</td>");
pw.println(" <td width=\"12%\">" + tag + "</td>");
pw.println(" <td width=\"68%\">" + defs.def[0] + "</td>");
pw.println(" </tr>");
for (i = 1; i < defs.def.length; i++) {
pw.println(" <tr>");
if (FileSyllableListTree.versionNumber==2) {
tag = ds.getTag(i);
}
else {
while (sourceb.isEmpty(k)) k++;
tag = sourceb.getTag(k);
k++;
}
pw.println(" <td width=\"12%\">" + tag + "</td>");
pw.println(" <td width=\"68%\">" + defs.def[i] + "</td>");
//else pw.println(" <td width=\"80%\" colspan=\"2\">" + defs.def[i] + "</td>");
pw.println(" </tr>");
}
} catch (Exception e) {
sl.writeLog("1\t1\t" + word.getWylie());
sl.writeException(e);
}
}
pw.println("</table>");
}
public void destroy()
{
super.destroy();
sl.setUserIP(null);
sl.writeLog("5\t1");
scanner.destroy();
}
}

View File

@ -1,162 +1,206 @@
/* /*
The contents of this file are subject to the AMP Open Community License The contents of this file are subject to the AMP Open Community License
Version 1.0 (the "License"); you may not use this file except in compliance Version 1.0 (the "License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License on the AMP web site with the License. You may obtain a copy of the License on the AMP web site
(http://www.tibet.iteso.mx/Guatemala/). (http://www.tibet.iteso.mx/Guatemala/).
Software distributed under the License is distributed on an "AS IS" basis, Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
License for the specific terms governing rights and limitations under the License for the specific terms governing rights and limitations under the
License. License.
The Initial Developer of this software is Andres Montano Pellegrini. Portions The Initial Developer of this software is Andres Montano Pellegrini. Portions
created by Andres Montano Pellegrini are Copyright 2001 Andres Montano created by Andres Montano Pellegrini are Copyright 2001 Andres Montano
Pellegrini. All Rights Reserved. Pellegrini. All Rights Reserved.
Contributor(s): ______________________________________. Contributor(s): ______________________________________.
*/ */
package org.thdl.tib.scanner; package org.thdl.tib.scanner;
import java.io.BufferedReader; import java.io.BufferedReader;
import java.io.InputStreamReader; import java.io.InputStreamReader;
import java.io.PrintWriter; import java.io.PrintWriter;
import java.util.ResourceBundle; import java.util.ResourceBundle;
import javax.servlet.GenericServlet; import javax.servlet.GenericServlet;
import javax.servlet.ServletRequest; import javax.servlet.ServletRequest;
import javax.servlet.ServletResponse; import javax.servlet.ServletResponse;
/** Running on the server, receives the tibetan text from applet/applications running on /** Running on the server, receives the tibetan text from applet/applications running on
the client and sends them the words with their definitions through the Internet. the client and sends them the words with their definitions through the Internet.
Requests are made through {@link RemoteTibetanScanner}. Requests are made through {@link RemoteTibetanScanner}.
@author Andr&eacute;s Montano Pellegrini @author Andr&eacute;s Montano Pellegrini
@see RemoteTibetanScanner @see RemoteTibetanScanner
*/ */
public class RemoteScannerFilter extends GenericServlet public class RemoteScannerFilter extends GenericServlet
{ {
private TibetanScanner scanner; private TibetanScanner scanner;
private BitDictionarySource ds; private BitDictionarySource ds;
private ScannerLogger sl; private ScannerLogger sl;
private static final int INTERNAL = 1;
public RemoteScannerFilter() private static final int JSON = 2;
{
ResourceBundle rb = ResourceBundle.getBundle("dictionary"); public RemoteScannerFilter()
sl = new ScannerLogger(); {
System.setProperty("java.awt.headless","true");
try ResourceBundle rb = ResourceBundle.getBundle("dictionary");
{ sl = new ScannerLogger();
scanner = new LocalTibetanScanner(rb.getString("onlinescannerfilter.dict-file-name"),false);
} try
catch (Exception e) {
{ scanner = new LocalTibetanScanner(rb.getString("onlinescannerfilter.dict-file-name"),false);
sl.writeLog("1\t2"); }
sl.writeException(e); catch (Exception e)
} {
ds = scanner.getDictionarySource(); sl.writeLog("1\t2");
sl.writeLog("Creation\t2"); sl.writeException(e);
} }
scanner.getDictionaryDescriptions();
public void service(ServletRequest req, ServletResponse res) //throws ServletException, IOException ds = scanner.getDictionarySource();
{ sl.writeLog("Creation\t2");
BufferedReader br; }
res.setContentType ("text/plain");
sl.setUserIP(req.getRemoteAddr()); public void service(ServletRequest req, ServletResponse res) //throws ServletException, IOException
{
Word word = null, words[] = null; BufferedReader br;
PrintWriter out; int format, i, j, k;
try
try {
{ req.setCharacterEncoding("UTF8");
out = res.getWriter(); }
} catch(Exception e)
catch (Exception e) {
{ // do nothing
sl.writeLog("1\t2"); }
sl.writeException(e); String linea, dicts = req.getParameter("dicts"), dicDescrip[], jwf = req.getParameter("jwf"), tag;
return; Definitions defs;
} ByteDictionarySource dict_source;
if (jwf!=null) format = JSON;
int i; else format = INTERNAL;
String linea, dicts = req.getParameter("dicts"), dicDescrip[]; switch (format)
{
if (dicts!=null) case INTERNAL:
{ res.setContentType ("text/plain");
if (dicts.equals("names")) break;
{ case JSON:
sl.writeLog("3\t2"); res.setContentType ("text/x-json");
dicDescrip = scanner.getDictionaryDescriptions(); }
if (dicDescrip==null) sl.setUserIP(req.getRemoteAddr());
{
out.close(); Word word = null, words[] = null;
return; PrintWriter out;
}
try
for (i=0; i<dicDescrip.length; i++) {
{ out = res.getWriter();
out.println(dicDescrip[i] + "," + DictionarySource.defTags[i]); }
} catch (Exception e)
out.close(); {
return; sl.writeLog("1\t2");
} sl.writeException(e);
else return;
{ }
ds.setDicts(Integer.parseInt(dicts));
}
} if (dicts!=null)
{
try if (dicts.equals("names"))
{ {
br = new BufferedReader(new InputStreamReader(req.getInputStream())); sl.writeLog("3\t2");
} dicDescrip = scanner.getDictionaryDescriptions();
catch (Exception e) if (dicDescrip==null)
{ {
sl.writeLog("1\t2"); out.close();
sl.writeException(e); return;
return; }
}
for (i=0; i<dicDescrip.length; i++)
{
/* FIXME: sometimes getDef raises a NullPointerException. out.println(dicDescrip[i] + "," + DictionarySource.defTags[i]);
In the meantime, I'll just keep it from crashing }
*/ out.close();
sl.writeLog("4\t2"); return;
}
try else
{ {
scanner.clearTokens(); ds.setDicts(Integer.parseInt(dicts));
while((linea = br.readLine())!= null) }
scanner.scanLine(linea); }
br.close(); if (format==JSON)
{
scanner.finishUp(); out.println(jwf + "({\"words\":{");
words = scanner.getWordArray(); }
try
for (i=0; i<words.length; i++) {
{ scanner.clearTokens();
linea = words[i].getDef(); switch (format)
if (linea == null) continue; {
out.println(words[i].getWylie()); case INTERNAL:
out.println(linea); br = req.getReader();
out.println(); sl.writeLog("4\t2");
} while((linea = br.readLine())!= null)
} scanner.scanLine(linea);
catch (Exception e) br.close();
{ break;
sl.writeLog("1\t2\t" + word.getWylie()); case JSON:
sl.writeException(e); linea = req.getParameter("text");
} linea = Manipulate.NCR2UnicodeString(linea);
if (Manipulate.guessIfUnicode(linea)) linea = BasicTibetanTranscriptionConverter.unicodeToWylie(linea);
scanner.clearTokens(); else if (Manipulate.guessIfAcip(linea)) linea = BasicTibetanTranscriptionConverter.acipToWylie(linea);
out.close(); scanner.scanLine(linea);
} }
scanner.finishUp();
public void destroy() words = scanner.getWordArray();
{
super.destroy(); for (i=0; i<words.length; i++)
sl.setUserIP(null); {
sl.writeLog("5\t2"); linea = words[i].getDef();
scanner.destroy(); if (linea == null) continue;
} switch (format)
{
case INTERNAL:
out.println(words[i].getWylie());
out.println(linea);
out.println();
break;
case JSON:
out.println("\"" + BasicTibetanTranscriptionConverter.wylieToHTMLUnicode(words[i].token) + "\": [");
defs = words[i].getDefs();
dict_source = (ByteDictionarySource)defs.getDictionarySource();
k=0;
for (j=0; j<defs.def.length; j++)
{
while (dict_source.isEmpty(k)) k++;
tag = dict_source.getTag(k);
k++;
out.println("\"" + tag + "\",");
out.print("\"" + Manipulate.toJSON(defs.def[j]) + "\"");
if (j==defs.def.length-1) out.println();
else out.println(",");
}
out.print("]");
if (i<words.length-1) out.println(",");
}
}
if (format==JSON) out.println("}});");
}
catch (Exception e)
{
sl.writeLog("1\t2\t" + word.getWylie());
sl.writeException(e);
}
scanner.clearTokens();
out.close();
}
public void destroy()
{
super.destroy();
sl.setUserIP(null);
sl.writeLog("5\t2");
scanner.destroy();
}
} }

View File

@ -1,86 +1,93 @@
/* /*
The contents of this file are subject to the AMP Open Community License The contents of this file are subject to the AMP Open Community License
Version 1.0 (the "License"); you may not use this file except in compliance Version 1.0 (the "License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License on the AMP web site with the License. You may obtain a copy of the License on the AMP web site
(http://www.tibet.iteso.mx/Guatemala/). (http://www.tibet.iteso.mx/Guatemala/).
Software distributed under the License is distributed on an "AS IS" basis, Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
License for the specific terms governing rights and limitations under the License for the specific terms governing rights and limitations under the
License. License.
The Initial Developer of this software is Andres Montano Pellegrini. Portions The Initial Developer of this software is Andres Montano Pellegrini. Portions
created by Andres Montano Pellegrini are Copyright 2001 Andres Montano created by Andres Montano Pellegrini are Copyright 2001 Andres Montano
Pellegrini. All Rights Reserved. Pellegrini. All Rights Reserved.
Contributor(s): ______________________________________. Contributor(s): ______________________________________.
*/ */
package org.thdl.tib.scanner; package org.thdl.tib.scanner;
import java.io.FileOutputStream; import java.io.FileOutputStream;
import java.io.PrintStream; import java.io.PrintStream;
import java.util.Calendar; import java.util.Calendar;
import java.util.ResourceBundle; import java.util.ResourceBundle;
/** Designed to keep a log of the transactions taking place in the /** Designed to keep a log of the transactions taking place in the
servlet version of the translation tool. servlet version of the translation tool.
@author Andr&eacute;s Montano Pellegrini @author Andr&eacute;s Montano Pellegrini
*/ */
public class ScannerLogger public class ScannerLogger
{ {
private String fileName; private String fileName;
private String lastIP; private String lastIP;
private boolean enabled;
public ScannerLogger()
{ public ScannerLogger()
ResourceBundle rb = ResourceBundle.getBundle("dictionary"); {
fileName = rb.getString("remotescannerfilter.log-file-name"); String temp;
lastIP = null; ResourceBundle rb = ResourceBundle.getBundle("dictionary");
} fileName = rb.getString("remotescannerfilter.log-file-name");
temp = rb.getString("remotescannerfilter.logging-enabled");
public String getCurrentTime() if (temp==null) enabled = false;
{ else enabled = temp.toLowerCase().equals("yes");
Calendar rightNow = Calendar.getInstance(); lastIP = null;
return Integer.toString(rightNow.get(Calendar.YEAR)) + "\t" + Integer.toString(rightNow.get(Calendar.MONTH)) + "\t" + Integer.toString(rightNow.get(Calendar.DAY_OF_MONTH)) + "\t" + Integer.toString(rightNow.get(Calendar.HOUR_OF_DAY)) + "\t" + Integer.toString(rightNow.get(Calendar.MINUTE)) + "\t" + Integer.toString(rightNow.get(Calendar.SECOND)); }
}
public String getCurrentTime()
public void setUserIP(String lastIP) {
{ Calendar rightNow = Calendar.getInstance();
this.lastIP = lastIP; return Integer.toString(rightNow.get(Calendar.YEAR)) + "\t" + Integer.toString(rightNow.get(Calendar.MONTH)) + "\t" + Integer.toString(rightNow.get(Calendar.DAY_OF_MONTH)) + "\t" + Integer.toString(rightNow.get(Calendar.HOUR_OF_DAY)) + "\t" + Integer.toString(rightNow.get(Calendar.MINUTE)) + "\t" + Integer.toString(rightNow.get(Calendar.SECOND));
} }
synchronized public void writeLog(String s) public void setUserIP(String lastIP)
{ {
PrintStream pw = getPrintStream(); this.lastIP = lastIP;
if (lastIP!=null) pw.print(lastIP); }
else pw.print("-");
pw.println("\t" + getCurrentTime() + "\t" + s); synchronized public void writeLog(String s)
pw.flush(); {
pw.close(); if (!enabled) return;
} PrintStream pw = getPrintStream();
if (lastIP!=null) pw.print(lastIP);
private PrintStream getPrintStream() else pw.print("-");
{ pw.println("\t" + getCurrentTime() + "\t" + s);
PrintStream pw; pw.flush();
try pw.close();
{ }
pw = new PrintStream(new FileOutputStream(fileName, true));
return pw; private PrintStream getPrintStream()
} {
catch (Exception e) PrintStream pw;
{ try
e.printStackTrace(); {
} pw = new PrintStream(new FileOutputStream(fileName, true));
return null; return pw;
} }
catch (Exception e)
synchronized public void writeException(Exception e) {
{ e.printStackTrace();
PrintStream pw = getPrintStream(); }
e.printStackTrace(pw); return null;
pw.flush(); }
pw.close();
} synchronized public void writeException(Exception e)
{
if (!enabled) return;
PrintStream pw = getPrintStream();
e.printStackTrace(pw);
pw.flush();
pw.close();
}
} }

View File

@ -1,257 +1,254 @@
/* /*
The contents of this file are subject to the AMP Open Community License The contents of this file are subject to the AMP Open Community License
Version 1.0 (the "License"); you may not use this file except in compliance Version 1.0 (the "License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License on the AMP web site with the License. You may obtain a copy of the License on the AMP web site
(http://www.tibet.iteso.mx/Guatemala/). (http://www.tibet.iteso.mx/Guatemala/).
Software distributed under the License is distributed on an "AS IS" basis, Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
License for the specific terms governing rights and limitations under the License for the specific terms governing rights and limitations under the
License. License.
The Initial Developer of this software is Andres Montano Pellegrini. Portions The Initial Developer of this software is Andres Montano Pellegrini. Portions
created by Andres Montano Pellegrini are Copyright 2001 Andres Montano created by Andres Montano Pellegrini are Copyright 2001 Andres Montano
Pellegrini. All Rights Reserved. Pellegrini. All Rights Reserved.
Contributor(s): ______________________________________. Contributor(s): ______________________________________.
*/ */
package org.thdl.tib.scanner; package org.thdl.tib.scanner;
import org.thdl.util.SimplifiedLinkedList; import org.thdl.util.SimplifiedLinkedList;
import org.thdl.util.SimplifiedListIterator; import org.thdl.util.SimplifiedListIterator;
import org.thdl.util.ThdlVersion; import org.thdl.util.ThdlVersion;
/** Defines the core methods required to provide access to a dictionary; local or remote. /** Defines the core methods required to provide access to a dictionary; local or remote.
@author Andr&eacute;s Montano Pellegrini @author Andr&eacute;s Montano Pellegrini
*/ */
public abstract class TibetanScanner public abstract class TibetanScanner
{ {
public static final String version = "The Tibetan to English Translation Tool, version 3.3.0 compiled on " + ThdlVersion.getTimeOfCompilation() + ". "; public static final String version = "The Tibetan to English Translation Tool, version 3.3.0 compiled on " + ThdlVersion.getTimeOfCompilation() + ". ";
public static final String copyrightUnicode="Copyright " + '\u00A9' + " 2000-200??6 by Andr" + '\u00E9' + "s Montano Pellegrini, all rights reserved."; public static final String copyrightUnicode="Copyright " + '\u00A9' + " 2000-2009 by Andr" + '\u00E9' + "s Montano Pellegrini, all rights reserved.";
public static final String copyrightASCII="Copyright 2000-2006 by Andres Montano Pellegrini, all rights reserved."; public static final String copyrightASCII="Copyright 2000-2009 by Andres Montano Pellegrini, all rights reserved.";
public static final String copyrightHTML="<hr><small><strong>" + version + "Copyright &copy; 2000-2006 by <a href=\"http://www.people.virginia.edu/~am2zb/\" target=\"_blank\">Andr&eacute;s Montano Pellegrini.</a><br/>All rights reserved.</strong></small>"; public static final String copyrightHTML="<hr><small><strong>" + version + "Copyright &copy; 2000-2009 by <a href=\"http://www.gaugeus.com/ramblings\" target=\"_blank\">Andr&eacute;s Montano Pellegrini.</a> All rights reserved.</strong></small>";
public static final int NORMAL_MODE=1; public static final int NORMAL_MODE=1;
public static final int DEBUG_MODE=2; public static final int DEBUG_MODE=2;
public static int mode; public static int mode;
static static
{ {
mode = NORMAL_MODE; mode = NORMAL_MODE;
} }
public static final String aboutTomeraider= public static final String aboutTomeraider=
"Welcome to Jeffrey Hopkins' Tibetan-Sanskrit-English Dictionary version 2.0.0!<p>\n" + "Welcome to Jeffrey Hopkins' Tibetan-Sanskrit-English Dictionary version 2.0.0!<p>\n" +
"This file was automatically generated using software developed by Andres Montano Pellegrini. " + "This file was automatically generated using software developed by Andres Montano Pellegrini. " +
"For more information, see http://www.people.virginia.edu/~am2zb/tibetan .<p>" + "For more information, see http://www.people.virginia.edu/~am2zb/tibetan .<p>" +
"<b>Formulator and Editor</b>: Jeffrey Hopkins<br>\n" + "<b>Formulator and Editor</b>: Jeffrey Hopkins<br>\n" +
"<b>Contributors</b>: Joe Wilson, Craig Preston, John Powers, Nathanial Garson, " + "<b>Contributors</b>: Joe Wilson, Craig Preston, John Powers, Nathanial Garson, " +
"Paul Hackett, Andres Montano<p>" + "Paul Hackett, Andres Montano<p>" +
"A project of the Tibetan Studies Institute, Boonesville, Virginia, and the " + "A project of the Tibetan Studies Institute, Boonesville, Virginia, and the " +
"University of Virginia Tibetan Studies Program<p>" + "University of Virginia Tibetan Studies Program<p>" +
"<i>\u00A9 Jeffrey Hopkins 1992.</i><p>" + "<i>\u00A9 Jeffrey Hopkins 1992.</i><p>" +
"<b>Apology</b><p>" + "<b>Apology</b><p>" +
"This is a work in progress in crude form that is being shared with students " + "This is a work in progress in crude form that is being shared with students " +
"of the Tibetan language mainly in order to receive input for further " + "of the Tibetan language mainly in order to receive input for further " +
"development. The English translations of the entries can be said only to " + "development. The English translations of the entries can be said only to " +
"represent what contributors, over a span of over thirty years, thought were " + "represent what contributors, over a span of over thirty years, thought were " +
"my current translations. A small number are simply wrong; others need to be " + "my current translations. A small number are simply wrong; others need to be " +
"updated; and all will receive much more attention and, hence, detail.<p>\n" + "updated; and all will receive much more attention and, hence, detail.<p>\n" +
"The Dictionary has been entered into a database with fields for the entry, " + "The Dictionary has been entered into a database with fields for the entry, " +
"Sanskrit, tenses, my English, a few others interests, examples, " + "Sanskrit, tenses, my English, a few others interests, examples, " +
"definition, divisions, and comments. At this point, very few entries " + "definition, divisions, and comments. At this point, very few entries " +
"contain all of these items, but the plan is provide these, where " + "contain all of these items, but the plan is provide these, where " +
"appropriate, over the years. Translations for entries that have arisen from " + "appropriate, over the years. Translations for entries that have arisen from " +
"my work and from interactions with my students are in boldface, whereas " + "my work and from interactions with my students are in boldface, whereas " +
"those from other works are in regular type on separate lines and are marked " + "those from other works are in regular type on separate lines and are marked " +
"with an initial at the end of the line. A key to these markings is given on " + "with an initial at the end of the line. A key to these markings is given on " +
"the next page.<p>\n" + "the next page.<p>\n" +
"(Please note that the radical signs for Sanskrit roots are, after the first" + "(Please note that the radical signs for Sanskrit roots are, after the first" +
"letter of the alphabet, in a state of disarray.)<p>\n" + "letter of the alphabet, in a state of disarray.)<p>\n" +
"I hope that you will bear with the many inadequacies of this first release.<p>\n" + "I hope that you will bear with the many inadequacies of this first release.<p>\n" +
"Paul Jeffrey Hopkins<br>\n" + "Paul Jeffrey Hopkins<br>\n" +
"Professor of Tibetan Studies<p>\n" + "Professor of Tibetan Studies<p>\n" +
"<b>Abbreviations</b><p>\n" + "<b>Abbreviations</b><p>\n" +
"B-7: ??? {PH: see dngos med ... & dngos po (synonyms) }<p>\n" + "B-7: ??? {PH: see dngos med ... & dngos po (synonyms) }<p>\n" +
"BJ: Bel-jor-hlun-drup (Dpal \'byor lhun grub). Legs bshad snying po\'i dka' " + "BJ: Bel-jor-hlun-drup (Dpal \'byor lhun grub). Legs bshad snying po\'i dka' " +
"\'grel bstan pa\'i sgron me (Buxaduar: Sera Monastery, 1968).<p>\n" + "\'grel bstan pa\'i sgron me (Buxaduar: Sera Monastery, 1968).<p>\n" +
"BK: ??? {PH: see bka\' (examples) }<p>\n" + "BK: ??? {PH: see bka\' (examples) }<p>\n" +
"BR: Losang Gyatso (Blo bzang rgya mtsho). Presentation of Knowledge and " + "BR: Losang Gyatso (Blo bzang rgya mtsho). Presentation of Knowledge and " +
"Awareness (Blo rig).<p>\n" + "Awareness (Blo rig).<p>\n" +
"BWT: Ngak-wang-bel-den (Ngag dbang dpal ldan). Annotations for " + "BWT: Ngak-wang-bel-den (Ngag dbang dpal ldan). Annotations for " +
"[Jam-yang-shay-ba\'s] \"Tenets\" (Grub mtha\' chen mo\'i mchan).<p>\n" + "[Jam-yang-shay-ba\'s] \"Tenets\" (Grub mtha\' chen mo\'i mchan).<p>\n" +
"C: Conze, Edward. Materials for a Dictionary of the Prajnaparamita " + "C: Conze, Edward. Materials for a Dictionary of the Prajnaparamita " +
"Literature (Tokyo: Suzuki Research Foundation, 1967).<p>\n" + "Literature (Tokyo: Suzuki Research Foundation, 1967).<p>\n" +
"col.: colloquial<p>\n" + "col.: colloquial<p>\n" +
"D1: Pur-bu-jok (Phur bu lcog). Presentation of the Collected Topics " + "D1: Pur-bu-jok (Phur bu lcog). Presentation of the Collected Topics " +
"(Part 1: Bsdus grwa chung ngu).<p>\n" + "(Part 1: Bsdus grwa chung ngu).<p>\n" +
"D2: Pur-bu-jok (Phur bu lcog). Presentation of the Collected Topics " + "D2: Pur-bu-jok (Phur bu lcog). Presentation of the Collected Topics " +
"(Part 2: Bsdus grwa \'bring).<p>\n" + "(Part 2: Bsdus grwa \'bring).<p>\n" +
"DASI: Decisive Analysis of Special Insight.<p>\n" + "DASI: Decisive Analysis of Special Insight.<p>\n" +
"DG: Germano, David. Poetic Thought, the Intelligent Universe, and the " + "DG: Germano, David. Poetic Thought, the Intelligent Universe, and the " +
"Mystery of Self: the Tantric Synthesis of rDzogs Chen in Fourteenth Century " + "Mystery of Self: the Tantric Synthesis of rDzogs Chen in Fourteenth Century " +
"Tibet. (Ph.d. dissertation, University of Wisconsin, Madison,WI 1992).<p>\n" + "Tibet. (Ph.d. dissertation, University of Wisconsin, Madison,WI 1992).<p>\n" +
"DK: Dzong-ka-ba (Tsong kha pa blo bzang grags pa). Drang ba dang nges pa\'i " + "DK: Dzong-ka-ba (Tsong kha pa blo bzang grags pa). Drang ba dang nges pa\'i " +
"don rnam par phye ba'i bstan bcos legs bshad snying po (Sarnath: Pleasure of " + "don rnam par phye ba'i bstan bcos legs bshad snying po (Sarnath: Pleasure of " +
"Elegant Sayings Press, 1979).<p>\n" + "Elegant Sayings Press, 1979).<p>\n" +
"Ganden Triba: Oral commentary of Ganden Triba Jam-bel-shen-pen.<p>\n" + "Ganden Triba: Oral commentary of Ganden Triba Jam-bel-shen-pen.<p>\n" +
"GCT: Ngak-wang-dra-shi (Ngag dbang bkra shis). Collected Topics by a " + "GCT: Ngak-wang-dra-shi (Ngag dbang bkra shis). Collected Topics by a " +
"Spiritual Son of Jam-yang-shay-ba (Sgo mang sras bsdus grwa).<p>\n" + "Spiritual Son of Jam-yang-shay-ba (Sgo mang sras bsdus grwa).<p>\n" +
"GD: Dreyfus, George. Ontology, Philosophy of Language, and Epistemology in " + "GD: Dreyfus, George. Ontology, Philosophy of Language, and Epistemology in " +
"Buddhist Tradition (Ph.d. dissertation. Religious Studies, University of " + "Buddhist Tradition (Ph.d. dissertation. Religious Studies, University of " +
"Virginia, Charlottesville,VA 1991).<p>\n" + "Virginia, Charlottesville,VA 1991).<p>\n" +
"Gon-chok: Gon-chok-jik-may-wang-bo (Dkon mchog \'jigs med dbang po). " + "Gon-chok: Gon-chok-jik-may-wang-bo (Dkon mchog \'jigs med dbang po). " +
"Precious Garland of Tenets (Grub mtha\' rin chen phreng ba).<p>\n" + "Precious Garland of Tenets (Grub mtha\' rin chen phreng ba).<p>\n" +
"Jang.: Jang-gya (Lcang skya rol pa\'i rdo rje). " + "Jang.: Jang-gya (Lcang skya rol pa\'i rdo rje). " +
"Presentation of Tenets (Lcang skya grub mtha').<p>\n" + "Presentation of Tenets (Lcang skya grub mtha').<p>\n" +
"JKA: ??? {PH: see mngon sum (definition) } <p>\n" + "JKA: ??? {PH: see mngon sum (definition) } <p>\n" +
"KS: Khetsun Sangpo, Biographical Dictionary of Tibet and Tibetan Buddhism. " + "KS: Khetsun Sangpo, Biographical Dictionary of Tibet and Tibetan Buddhism. " +
"(LTWA: Dharamsala, HP)<p>\n" + "(LTWA: Dharamsala, HP)<p>\n" +
"L: Lamotte, Etienne. Samdhinirmocana-sutra " + "L: Lamotte, Etienne. Samdhinirmocana-sutra " +
"(Louvain: Universite de Louvain, 1935).<p>\n" + "(Louvain: Universite de Louvain, 1935).<p>\n" +
"LAK: Jam-bel-sam-pel (\'Jam dpal bsam phel). Presentation of Awareness and " + "LAK: Jam-bel-sam-pel (\'Jam dpal bsam phel). Presentation of Awareness and " +
"Knowledge (Blo rig gi rnam bzhag).<p>\n" + "Knowledge (Blo rig gi rnam bzhag).<p>\n" +
"Lati: Oral commentary by Lati Rinbochay.<p>\n" + "Lati: Oral commentary by Lati Rinbochay.<p>\n" +
"LCh: Chandra, Lokesh. Tibetan-Sanskrit Dictionary (New Delhi, 1987).<p>\n" + "LCh: Chandra, Lokesh. Tibetan-Sanskrit Dictionary (New Delhi, 1987).<p>\n" +
"LG: Losang Gyatso\'s Blo rig.<p>\n" + "LG: Losang Gyatso\'s Blo rig.<p>\n" +
"LM: ??? {PH: see skye bu chung ngu ... }<p>\n" + "LM: ??? {PH: see skye bu chung ngu ... }<p>\n" +
"LR: Hopkins, Jeffrey. Glossary for Gsung rab kun gyi snying po lam rim gyi " + "LR: Hopkins, Jeffrey. Glossary for Gsung rab kun gyi snying po lam rim gyi " +
"gtso bo rnam pa gsung gi khrid yid gzhan phan snying po (by Panchen Lama IV).<p>\n" + "gtso bo rnam pa gsung gi khrid yid gzhan phan snying po (by Panchen Lama IV).<p>\n" +
"LSR: Tsul-trim-nam-gyel (Tshul khrims rnam rgyal). Presentation of Signs " + "LSR: Tsul-trim-nam-gyel (Tshul khrims rnam rgyal). Presentation of Signs " +
"and Reasonings (Rtags rigs kyi rnam bzhag).<p>\n" + "and Reasonings (Rtags rigs kyi rnam bzhag).<p>\n" +
"LWT: Lo-sang-gon-chok (Blo bzang dkon mchog). Word Commentary on the Root " + "LWT: Lo-sang-gon-chok (Blo bzang dkon mchog). Word Commentary on the Root " +
"Text of [Jam-yang-shay-ba\'s] \"Tenets\".<p>\n" + "Text of [Jam-yang-shay-ba\'s] \"Tenets\".<p>\n" +
"ME: Hopkins, Jeffrey. Meditation on Emptiness (London, Wisdom, 1983).<p>\n" + "ME: Hopkins, Jeffrey. Meditation on Emptiness (London, Wisdom, 1983).<p>\n" +
"MGP: ??? {PH: see bkag (examples) }<p>\n" + "MGP: ??? {PH: see bkag (examples) }<p>\n" +
"MSA: Nagao, Gadjin. Index to the Mahayanasutralankara (Tokyo: Nippon " + "MSA: Nagao, Gadjin. Index to the Mahayanasutralankara (Tokyo: Nippon " +
"Gakujutsu Shinkvo-kai, 1958).<p>\n" + "Gakujutsu Shinkvo-kai, 1958).<p>\n" +
"MSI: Dzong-ka-ba (Tsong kha pa blo bzang grags pa). Middling Exposition of " + "MSI: Dzong-ka-ba (Tsong kha pa blo bzang grags pa). Middling Exposition of " +
"Special Insight (Lhag mthong \'bring).<p>\n" + "Special Insight (Lhag mthong \'bring).<p>\n" +
"MV: Nagao, Gadjin. Index to the Madhyanta-vibhaga (Tokyo: 1961).<p>\n" + "MV: Nagao, Gadjin. Index to the Madhyanta-vibhaga (Tokyo: 1961).<p>\n" +
"N: Zuiryu NAKAMURA. Index to the Ratnagotravibhaga-mahayanottaratantra-sastra " + "N: Zuiryu NAKAMURA. Index to the Ratnagotravibhaga-mahayanottaratantra-sastra " +
"(Tokyo, 1961).<p>\n" + "(Tokyo, 1961).<p>\n" +
"P: Peking edition of the Tripitaka.<p>\n" + "P: Peking edition of the Tripitaka.<p>\n" +
"PGP: Lo-sang-da-yang (Blo bzang rta dbyangs). Presentation of the Grounds " + "PGP: Lo-sang-da-yang (Blo bzang rta dbyangs). Presentation of the Grounds " +
"and Paths in Prasangika (Thal \'gyur pa\'i sa lam).<p>\n" + "and Paths in Prasangika (Thal \'gyur pa\'i sa lam).<p>\n" +
"PP: Candrakirti. Prasannapada.<p>\n" + "PP: Candrakirti. Prasannapada.<p>\n" +
"S: Samdhinirmocana-sutra (Tok Palace version, 160 pp., Leh, Ladakh: Shesrig " + "S: Samdhinirmocana-sutra (Tok Palace version, 160 pp., Leh, Ladakh: Shesrig " +
"Dpemzod, 1975-1980, vol. ja).<p>\n" + "Dpemzod, 1975-1980, vol. ja).<p>\n" +
"TAK: Pur-bu-jok (Phur bu lcog). Explanation of the Presentation of Objects " + "TAK: Pur-bu-jok (Phur bu lcog). Explanation of the Presentation of Objects " +
"and Object-Possessors as Well as Awareness and Knowledge (Yul dang yul can " + "and Object-Possessors as Well as Awareness and Knowledge (Yul dang yul can " +
"dang blo rig).<p>\n" + "dang blo rig).<p>\n" +
"TCT: Pur-bu-jok (Phur bu lcog). Presentation of the Collected Topics (Yongs " + "TCT: Pur-bu-jok (Phur bu lcog). Presentation of the Collected Topics (Yongs " +
"\'dzin bsdus grwa).<p>\n" + "\'dzin bsdus grwa).<p>\n" +
"TGP: Nga-wang-bel-den (Ngag dbang dpal ldan). Treatise Illuminating the " + "TGP: Nga-wang-bel-den (Ngag dbang dpal ldan). Treatise Illuminating the " +
"Presentation of the Four Great Secret Tantra Sets (Sngags kyi sa lam).<p>\n" + "Presentation of the Four Great Secret Tantra Sets (Sngags kyi sa lam).<p>\n" +
"TN: Vasubandhu. Trisvabhavanirdesha.<p>\n" + "TN: Vasubandhu. Trisvabhavanirdesha.<p>\n" +
"VM: Bu-don-rin-chen-drup (bu ston rin chen grub), The Practice of " + "VM: Bu-don-rin-chen-drup (bu ston rin chen grub), The Practice of " +
"(Jnandagarbha\'s) \"The Rite of the Vajra-Element Mandala: The Source of All " + "(Jnandagarbha\'s) \"The Rite of the Vajra-Element Mandala: The Source of All " +
"Vajras\": A Precious Enhancer of Thought (rDo rje dbyings kyi dkyil \'khor gyi " + "Vajras\": A Precious Enhancer of Thought (rDo rje dbyings kyi dkyil \'khor gyi " +
"cho ga rdo rje thams cad \'byung ba zhes bya ba\'i lag len rin chen bsam \'phel), " + "cho ga rdo rje thams cad \'byung ba zhes bya ba\'i lag len rin chen bsam \'phel), " +
"in Collected Works, Part 12 na. Lhasa: Zhol Printing House, 1990.<p>\n" + "in Collected Works, Part 12 na. Lhasa: Zhol Printing House, 1990.<p>\n" +
"Y: Susumi YAMAGUCHI.Index to the Prasannapada Madhyamakavrtti. " + "Y: Susumi YAMAGUCHI.Index to the Prasannapada Madhyamakavrtti. " +
"(Kyoto: Heirakuji-Shoten, 1974).<p>\n" + "(Kyoto: Heirakuji-Shoten, 1974).<p>\n" +
"YT: Oral commentary by Yeshi Thupten."; "YT: Oral commentary by Yeshi Thupten.";
protected SimplifiedLinkedList wordList; protected SimplifiedLinkedList wordList;
public TibetanScanner() public TibetanScanner()
{ {
wordList = new SimplifiedLinkedList(); wordList = new SimplifiedLinkedList();
} }
public void clearTokens() public void clearTokens()
{ {
wordList = new SimplifiedLinkedList(); wordList = new SimplifiedLinkedList();
} }
public Token[] getTokenArray() public Token[] getTokenArray()
{ {
int n=wordList.size(); int n=wordList.size();
if (n==0) return null; if (n==0) return null;
Token token[] = new Token[n]; Token token[] = new Token[n];
SimplifiedListIterator li = wordList.listIterator(); SimplifiedListIterator li = wordList.listIterator();
while(li.hasNext()) while(li.hasNext())
token[--n] = (Token)li.next(); token[--n] = (Token)li.next();
return token; return token;
} }
public SimplifiedLinkedList getTokenLinkedList() public SimplifiedLinkedList getTokenLinkedList()
{ {
return wordList; return wordList;
} }
public Word[] getWordArray() public Word[] getWordArray()
{ {
return getWordArray(true); return getWordArray(true);
} }
public Word[] getWordArray(boolean includeRepeated) public Word[] getWordArray(boolean includeRepeated)
{ {
Token token; Token token;
Word array[], word; Word array[], word;
int n=0; int n=0;
SimplifiedListIterator li = wordList.listIterator(); SimplifiedListIterator li = wordList.listIterator();
SimplifiedLinkedList ll2, ll = new SimplifiedLinkedList(); SimplifiedLinkedList ll2, ll = new SimplifiedLinkedList();
while(li.hasNext()) while(li.hasNext())
{ {
token = (Token) li.next(); token = (Token) li.next();
if (token instanceof Word) if (token instanceof Word)
{ {
ll.addLast(token); ll.addLast(token);
} }
} }
if (includeRepeated) if (includeRepeated)
{ {
n = ll.size(); n = ll.size();
if (n==0) return null; if (n==0) return null;
array = new Word[n]; array = new Word[n];
li = ll.listIterator(); li = ll.listIterator();
n=0; n=0;
while (li.hasNext()) while (li.hasNext())
{ {
array[n++] = (Word) li.next(); array[n++] = (Word) li.next();
} }
} }
else else
{ {
ll2 = new SimplifiedLinkedList(); ll2 = new SimplifiedLinkedList();
li = ll.listIterator(); li = ll.listIterator();
while(li.hasNext()) while(li.hasNext())
{ {
word = (Word) li.next(); word = (Word) li.next();
if (!ll2.contains(word)) ll2.addLast(word); if (!ll2.contains(word)) ll2.addLast(word);
} }
n = ll2.size(); n = ll2.size();
if (n==0) return null; if (n==0) return null;
array = new Word[n]; array = new Word[n];
li = ll2.listIterator(); li = ll2.listIterator();
while (li.hasNext()) while (li.hasNext())
{ {
array[--n] = (Word) li.next(); array[--n] = (Word) li.next();
} }
} }
return array;
}
return array; public abstract void scanLine(String linea);
} public abstract void scanBody(String linea);
public abstract void finishUp();
public abstract void scanLine(String linea); public abstract BitDictionarySource getDictionarySource();
public abstract void scanBody(String linea); public abstract String[] getDictionaryDescriptions();
public abstract void finishUp(); public abstract void destroy();
public abstract BitDictionarySource getDictionarySource(); }
public abstract String[] getDictionaryDescriptions();
public abstract void destroy();
}