Fixed translation tool servlet issues: got rid of title, deleted white space, dealt with UTF8 better, etc.

This commit is contained in:
amontano 2009-03-03 05:23:49 +00:00
parent 835e74c0cd
commit 5a0e454a2e
6 changed files with 1574 additions and 1434 deletions

View file

@ -46,6 +46,7 @@ public class BasicTibetanTranscriptionConverter implements FontConverterConstant
private static final int WYLIE_TO_ACIP=2;
private static final int UNICODE_TO_WYLIE=3;
private static final int WYLIE_TO_UNICODE=4;
private static final int TIBETAN_UNICODE_RANGE[] = {3840, 4095};
/** Converts from the Acip transliteration scheme to EWTS.*/
public static String acipToWylie(String acip)
@ -252,7 +253,19 @@ public class BasicTibetanTranscriptionConverter implements FontConverterConstant
nuevaPalabra = Manipulate.fixWazur(nuevaPalabra);
return nuevaPalabra;*/
}
private static int getTibetanUnicodeStart(String unicode, int pos)
{
for(; pos < unicode.length(); pos++ ) if(unicode.codePointAt(pos)>=TIBETAN_UNICODE_RANGE[0] && unicode.codePointAt(pos)<=TIBETAN_UNICODE_RANGE[1]) return pos;
return -1;
}
private static int getTibetanUnicodeEnd(String unicode, int pos)
{
for(; pos < unicode.length(); pos++ ) if(unicode.codePointAt(pos)<TIBETAN_UNICODE_RANGE[0] || unicode.codePointAt(pos)>TIBETAN_UNICODE_RANGE[1]) return pos;
return pos;
}
/** Converts Tibetan Unicode to EWTS. */
public static String unicodeToWylie(String unicode)
{
@ -261,9 +274,9 @@ public class BasicTibetanTranscriptionConverter implements FontConverterConstant
TibetanDocument tibDoc;
StringBuffer errors;
int posStart=0, posEnd;
while((posStart = Manipulate.getTibetanUnicodeStart(unicode, posStart))>=0)
while((posStart = getTibetanUnicodeStart(unicode, posStart))>=0)
{
posEnd = Manipulate.getTibetanUnicodeEnd(unicode, posStart+1);
posEnd = getTibetanUnicodeEnd(unicode, posStart+1);
startString = unicode.substring(0, posStart);
tibetanString = unicode.substring(posStart, posEnd);
endString = unicode.substring(posEnd);

View file

@ -1,463 +1,500 @@
/*
The contents of this file are subject to the AMP Open Community License
Version 1.0 (the "License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License on the AMP web site
(http://www.tibet.iteso.mx/Guatemala/).
Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
License for the specific terms governing rights and limitations under the
License.
The Initial Developer of this software is Andres Montano Pellegrini. Portions
created by Andres Montano Pellegrini are Copyright 2001 Andres Montano
Pellegrini. All Rights Reserved.
Contributor(s): ______________________________________.
*/
package org.thdl.tib.scanner;
/** Miscelaneous static methods for the manipulation of Tibetan text.
@author Andr&eacute;s Montano Pellegrini
*/
public class Manipulate
{
private static String endOfParagraphMarks = "/;|!:^@#$%=";
private static String bracketMarks = "<>(){}[]";
private static String endOfSyllableMarks = " _\t";
private static String allStopMarkers = endOfSyllableMarks + endOfParagraphMarks + bracketMarks;
private static final int TIBETAN_UNICODE_RANGE[] = {3840, 4095};
/* public static String[] parseFields (String s, char delimiter)
{
int pos;
String field;
SimplifiedLinkedList ll = new SimplifiedLinkedList();
while ((pos = s.indexOf(delimiter))>=0)
{
field = s.substring(0, pos).trim();
ll.addLast(field);
s = s.substring(pos+1);
}
ll.addLast(s.trim());
return ll.toStringArray();
}*/
public static int indexOfAnyChar(String str, String chars)
{
int i;
for (i=0; i<str.length(); i++)
{
if (chars.indexOf(str.charAt(i))>=0)
return i;
}
return -1;
}
public static int indexOfExtendedEndOfSyllableMark(String word)
{
return indexOfAnyChar(word, allStopMarkers);
}
public static int indexOfBracketMarks(String word)
{
return indexOfAnyChar(word, bracketMarks);
}
public static boolean isPunctuationMark(int ch)
{
return endOfParagraphMarks.indexOf(ch)>=0 || bracketMarks.indexOf(ch)>=0;
}
public static boolean isEndOfParagraphMark(int ch)
{
return endOfParagraphMarks.indexOf(ch)>=0;
}
public static boolean isEndOfSyllableMark(int ch)
{
return endOfSyllableMarks.indexOf(ch)>=0;
}
public static boolean isMeaningful(String s)
{
for (int i=0; i<s.length(); i++)
if (Character.isLetterOrDigit(s.charAt(i))) return true;
return false;
}
public static String replace(String linea, String origSub, String newSub)
{
int pos, lenOrig = origSub.length();
while ((pos = linea.indexOf(origSub))!=-1)
{
linea = linea.substring(0, pos).concat(newSub).concat(linea.substring(pos+lenOrig));
}
return linea;
}
public static String deleteSubstring (String string, int pos, int posEnd)
{
if (pos<0) return string;
if (pos==0)
{
return string.substring(posEnd).trim();
}
else
{
if (posEnd<string.length())
return string.substring(0, pos).concat(string.substring(posEnd)).trim();
else
return string.substring(0, pos).trim();
}
}
public static String replace(String string, int pos, int posEnd, String newSub)
{
if (pos<0) return string;
if (pos==0)
{
return newSub.concat(string.substring(posEnd)).trim();
}
else
{
if (posEnd<string.length())
return string.substring(0, pos).concat(newSub).concat(string.substring(posEnd)).trim();
else
return string.substring(0, pos).concat(newSub).trim();
}
}
public static String deleteSubstring (String string, String sub)
{
int pos = string.indexOf(sub), posEnd = pos + sub.length();
return deleteSubstring(string, pos, posEnd);
}
public static String[] addString(String array[], String s, int n)
{
int i;
String newArray[] = new String[array.length+1];
for (i=0; i<n; i++)
newArray[i] = array[i];
newArray[n] = s;
for (i=n+1; i<newArray.length; i++)
newArray[i] = array[i-1];
return newArray;
}
public static String[] deleteString(String array[], int n)
{
int i;
String newArray[] = new String[array.length-1];
for (i=0; i<n; i++)
newArray[i] = array[i];
for (i=n; i<newArray.length; i++)
newArray[i] = array[i+1];
return newArray;
}
public static boolean isVowel (char ch)
{
ch = Character.toLowerCase(ch);
return ch=='a' || ch=='e' || ch=='i' || ch=='o' || ch=='u';
}
/** If more than half of the first letters among the first are 10 characters
are uppercase assume its acip */
public static boolean guessIfAcip(String line)
{
char ch;
int letters=0, upperCase=0, i, n;
n = line.length();
if (n>10) n = 10;
for (i=0; i<n; i++)
{
ch = line.charAt(i);
if (Character.isLetter(ch))
{
letters++;
if (Character.isUpperCase(ch)) upperCase++;
}
}
if (letters==0 || upperCase==0) return false;
else return (letters / upperCase < 2);
}
public static boolean isTibetanUnicodeCharacter(char ch)
{
return ch>=0xF00 && ch<=0xFFF;
}
public static boolean isTibetanUnicodeLetter(char ch)
{
return ch>=0xF40 && ch<=0xFBC || ch>=0xF00 && ch<=0xF03;
}
public static boolean isTibetanUnicodeDigit(char ch)
{
return ch>=0xF20 && ch<=0xF33;
}
public static boolean guessIfUnicode(String line)
{
char ch;
int unicode=0, i, n;
n = line.length();
if (n>10) n = 10;
for (i=0; i<n; i++)
{
ch = line.charAt(i);
if (isTibetanUnicodeCharacter(ch)) unicode++;
}
if (n==0 || unicode==0) return false;
else return (n / unicode < 2);
}
public static String fixWazur(String linea)
{
int i;
for (i=1; i<linea.length(); i++)
{
if (linea.charAt(i)=='W')
{
if (Character.isLetter(linea.charAt(i-1)))
linea = linea.substring(0,i) + 'V' + linea.substring(i+1);
}
}
return linea;
}
/** Returns the base letter of a syllable. Does not include the vowel!
Ignoring cases for now. */
public static String getBaseLetter (String sil)
{
sil = sil.toLowerCase();
int i=0;
char ch, ch2;
while (!isVowel(sil.charAt(i))) i++;
if (i==0) return "";
i--;
if (i==-1) return "";
if (sil.charAt(i)=='-') i--;
ch = sil.charAt(i);
// check to see if it is a subscript (y, r, l, w)
if (i>0)
{
switch (ch)
{
case 'r': case 'l': case 'w': i--;
break;
case 'y':
ch2 = sil.charAt(i-1);
switch (ch2)
{
case '.': return "y";
case 'n': return "ny";
default: i--;
}
}
}
if (i==0) return sil.substring(i,i+1);
ch = sil.charAt(i);
ch2 = sil.charAt(i-1);
switch(ch)
{
case 'h':
switch (ch2)
{
case 'k': case 'c': case 't': case 'p': case 'z':
return sil.substring(i-1,i+1);
case 's':
if (i-2>=0 && sil.charAt(i-2)=='t') return "tsh";
else return "sh";
default: return "h";
}
case 's':
if (ch2=='t') return "ts";
else return "s";
case 'g':
if (ch2=='n') return "ng";
else return "g";
case 'z':
if (ch2=='d') return "dz";
else return "z";
}
return sil.substring(i,i+1);
}
public static String deleteQuotes(String s)
{
int length = s.length(), pos;
if (length>2)
{
if ((s.charAt(0)=='\"') && (s.charAt(length-1)=='\"'))
s = s.substring(1,length-1);
do
{
pos = s.indexOf("\"\"");
if (pos<0) break;
s = Manipulate.deleteSubstring(s, pos, pos+1);
} while (true);
}
return s;
}
/** Syntax: java Manipulate [word-file] < source-dic-entries > dest-dic-entries
Takes the output of ConsoleScannerFilter
(in RY format), converts the Wylie to Acip
and displays the result in csv format.
arch-palabras es usado solo cuando deseamos las palabras cambiadas
a otro archivo.
public static void main (String[] args) throws Exception
{
String linea, palabra, definicion, nuevaPalabra;
int marker;
PrintWriter psPalabras = null;
BufferedReader keyb = new BufferedReader(new InputStreamReader(System.in));
if (args.length==1)
psPalabras = new PrintWriter(new FileOutputStream(args[0]));
while ((linea=keyb.readLine())!=null)
{
if (linea.trim().equals("")) continue;
marker = linea.indexOf('-');
if (marker<0) // linea tiene error
{
palabra = linea;
definicion = "";
}
else
{
palabra = linea.substring(0, marker).trim();
definicion = linea.substring(marker+1).trim();
}
nuevaPalabra = wylieToAcip(palabra);
if (psPalabras!=null)
psPalabras.println(nuevaPalabra);
else System.out.print(nuevaPalabra + '\t');
if (definicion.equals(""))
System.out.println(palabra);
else
System.out.println(palabra + '\t' + definicion);
}
if (psPalabras!=null) psPalabras.flush();
}*/
/** From http://www.i18nfaq.com/2005/07/how-do-i-convert-ncr-format-to-java.html */
public static String NCR2UnicodeString(String str)
{
StringBuffer ostr = new StringBuffer();
int i1=0;
int i2=0;
while(i2<str.length())
{
i1 = str.indexOf("&#",i2);
if (i1 == -1 ) {
ostr.append(str.substring(i2, str.length()));
break ;
}
ostr.append(str.substring(i2, i1));
i2 = str.indexOf(";", i1);
if (i2 == -1 ) {
ostr.append(str.substring(i1, str.length()));
break ;
}
String tok = str.substring(i1+2, i2);
try {
int radix = 10 ;
if (tok.trim().charAt(0) == 'x') {
radix = 16 ;
tok = tok.substring(1,tok.length());
}
ostr.append((char) Integer.parseInt(tok, radix));
} catch (NumberFormatException exp) {
ostr.append('?') ;
}
i2++ ;
}
return new String(ostr) ;
}
public static String UnicodeString2NCR(String str)
{
StringBuffer ncr = new StringBuffer();
int i;
for (i=0; i<str.length(); i++)
{
ncr.append("&#" + Integer.toString(str.charAt(i)) + ";");
}
return ncr.toString();
}
public static String unescape(String s) {
int i=0,len=s.length();
char c;
StringBuffer sb = new StringBuffer(len);
while (i<len) {
c = s.charAt(i++);
if (c=='\\') {
if (i<len) {
c = s.charAt(i++);
if (c=='u') {
c = (char) Integer.parseInt(s.substring(i,i+4),16);
i += 4;
} // add other cases here as desired...
}} // fall through: \ escapes itself, quotes any character but u
sb.append(c);
}
return sb.toString();
}
public static int getTibetanUnicodeStart(String unicode, int pos)
{
for(; pos < unicode.length(); pos++ ) if(unicode.codePointAt(pos)>=TIBETAN_UNICODE_RANGE[0] && unicode.codePointAt(pos)<=TIBETAN_UNICODE_RANGE[1]) return pos;
return -1;
}
public static int getTibetanUnicodeEnd(String unicode, int pos)
{
for(; pos < unicode.length(); pos++ ) if(unicode.codePointAt(pos)<TIBETAN_UNICODE_RANGE[0] || unicode.codePointAt(pos)>TIBETAN_UNICODE_RANGE[1]) return pos;
return pos;
}
}
/*
The contents of this file are subject to the AMP Open Community License
Version 1.0 (the "License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License on the AMP web site
(http://www.tibet.iteso.mx/Guatemala/).
Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
License for the specific terms governing rights and limitations under the
License.
The Initial Developer of this software is Andres Montano Pellegrini. Portions
created by Andres Montano Pellegrini are Copyright 2001 Andres Montano
Pellegrini. All Rights Reserved.
Contributor(s): ______________________________________.
*/
package org.thdl.tib.scanner;
/** Miscelaneous static methods for the manipulation of Tibetan text.
@author Andr&eacute;s Montano Pellegrini
*/
public class Manipulate
{
private static String endOfParagraphMarks = "/;|!:^@#$%=,";
private static String bracketMarks = "<>(){}[]";
private static String endOfSyllableMarks = " _\t";
private static String allStopMarkers = endOfSyllableMarks + endOfParagraphMarks + bracketMarks;
private static final int TIBETAN_UNICODE_RANGE[] = {3840, 4095};
private static String JSON_ESCAPABLES = "\"\\/";
/* public static String[] parseFields (String s, char delimiter)
{
int pos;
String field;
SimplifiedLinkedList ll = new SimplifiedLinkedList();
while ((pos = s.indexOf(delimiter))>=0)
{
field = s.substring(0, pos).trim();
ll.addLast(field);
s = s.substring(pos+1);
}
ll.addLast(s.trim());
return ll.toStringArray();
}*/
public static int indexOfAnyChar(String str, String chars)
{
int i;
for (i=0; i<str.length(); i++)
{
if (chars.indexOf(str.charAt(i))>=0)
return i;
}
return -1;
}
public static int indexOfExtendedEndOfSyllableMark(String word)
{
return indexOfAnyChar(word, allStopMarkers);
}
public static int indexOfBracketMarks(String word)
{
return indexOfAnyChar(word, bracketMarks);
}
public static boolean isPunctuationMark(int ch)
{
return endOfParagraphMarks.indexOf(ch)>=0 || bracketMarks.indexOf(ch)>=0;
}
public static boolean isEndOfParagraphMark(int ch)
{
return endOfParagraphMarks.indexOf(ch)>=0;
}
public static boolean isEndOfSyllableMark(int ch)
{
return endOfSyllableMarks.indexOf(ch)>=0;
}
public static boolean isMeaningful(String s)
{
for (int i=0; i<s.length(); i++)
if (Character.isLetterOrDigit(s.charAt(i))) return true;
return false;
}
public static String replace(String linea, String origSub, String newSub)
{
int pos, lenOrig = origSub.length();
while ((pos = linea.indexOf(origSub))!=-1)
{
linea = linea.substring(0, pos).concat(newSub).concat(linea.substring(pos+lenOrig));
}
return linea;
}
public static String deleteSubstring (String string, int pos, int posEnd)
{
if (pos<0) return string;
if (pos==0)
{
return string.substring(posEnd).trim();
}
else
{
if (posEnd<string.length())
return string.substring(0, pos).concat(string.substring(posEnd)).trim();
else
return string.substring(0, pos).trim();
}
}
public static String replace(String string, int pos, int posEnd, String newSub)
{
if (pos<0) return string;
if (pos==0)
{
return newSub.concat(string.substring(posEnd)).trim();
}
else
{
if (posEnd<string.length())
return string.substring(0, pos).concat(newSub).concat(string.substring(posEnd)).trim();
else
return string.substring(0, pos).concat(newSub).trim();
}
}
public static String deleteSubstring (String string, String sub)
{
int pos = string.indexOf(sub), posEnd = pos + sub.length();
return deleteSubstring(string, pos, posEnd);
}
public static String[] addString(String array[], String s, int n)
{
int i;
String newArray[] = new String[array.length+1];
for (i=0; i<n; i++)
newArray[i] = array[i];
newArray[n] = s;
for (i=n+1; i<newArray.length; i++)
newArray[i] = array[i-1];
return newArray;
}
public static String[] deleteString(String array[], int n)
{
int i;
String newArray[] = new String[array.length-1];
for (i=0; i<n; i++)
newArray[i] = array[i];
for (i=n; i<newArray.length; i++)
newArray[i] = array[i+1];
return newArray;
}
public static boolean isVowel (char ch)
{
ch = Character.toLowerCase(ch);
return ch=='a' || ch=='e' || ch=='i' || ch=='o' || ch=='u';
}
/** If more than half of the first letters among the first are 10 characters
are uppercase assume its acip */
public static boolean guessIfAcip(String line)
{
char ch;
int letters=0, upperCase=0, i, n;
n = line.length();
if (n>10) n = 10;
for (i=0; i<n; i++)
{
ch = line.charAt(i);
if (Character.isLetter(ch))
{
letters++;
if (Character.isUpperCase(ch)) upperCase++;
}
}
if (letters==0 || upperCase==0) return false;
else return (letters / upperCase < 2);
}
public static boolean isTibetanUnicodeCharacter(char ch)
{
return ch>=0xF00 && ch<=0xFFF;
}
public static boolean isTibetanUnicodeLetter(char ch)
{
return ch>=0xF40 && ch<=0xFBC || ch>=0xF00 && ch<=0xF03;
}
public static boolean isTibetanUnicodeDigit(char ch)
{
return ch>=0xF20 && ch<=0xF33;
}
public static boolean guessIfUnicode(String line)
{
char ch;
int unicode=0, i, n;
n = line.length();
if (n>10) n = 10;
for (i=0; i<n; i++)
{
ch = line.charAt(i);
if (isTibetanUnicodeCharacter(ch)) unicode++;
}
if (n==0 || unicode==0) return false;
else return (n / unicode < 2);
}
public static String fixWazur(String linea)
{
int i;
for (i=1; i<linea.length(); i++)
{
if (linea.charAt(i)=='W')
{
if (Character.isLetter(linea.charAt(i-1)))
linea = linea.substring(0,i) + 'V' + linea.substring(i+1);
}
}
return linea;
}
/** Returns the base letter of a syllable. Does not include the vowel!
Ignoring cases for now. */
public static String getBaseLetter (String sil)
{
sil = sil.toLowerCase();
int i=0;
char ch, ch2;
while (!isVowel(sil.charAt(i)))
{
i++;
if (i>=sil.length()) return null;
}
if (i==0) return "";
i--;
if (i==-1) return "";
if (sil.charAt(i)=='-') i--;
if (i>0 && sil.charAt(i)=='w') i--;
ch = sil.charAt(i);
// check to see if it is a subscript (y, r, l, w)
if (i>0)
{
switch (ch)
{
case 'r': case 'l': i--;
break;
case 'y':
ch2 = sil.charAt(i-1);
switch (ch2)
{
case '.': return "y";
case 'n': return "ny";
default: i--;
}
}
}
if (sil.charAt(i)=='+') i--;
if (i==0) return sil.substring(i,i+1);
ch = sil.charAt(i);
ch2 = sil.charAt(i-1);
switch(ch)
{
case 'h':
switch (ch2)
{
case 'k': case 'c': case 't': case 'p': case 'z':
return sil.substring(i-1,i+1);
case '+':
return sil.substring(i-2, i-1);
case 's':
if (i-2>=0 && sil.charAt(i-2)=='t') return "tsh";
else return "sh";
default: return "h";
}
case 's':
if (ch2=='t') return "ts";
else return "s";
case 'g':
if (ch2=='n') return "ng";
else return "g";
case 'z':
if (ch2=='d') return "dz";
else return "z";
}
return sil.substring(i,i+1);
}
public static String deleteQuotes(String s)
{
int length = s.length(), pos;
if (length>2)
{
if ((s.charAt(0)=='\"') && (s.charAt(length-1)=='\"'))
s = s.substring(1,length-1);
do
{
pos = s.indexOf("\"\"");
if (pos<0) break;
s = Manipulate.deleteSubstring(s, pos, pos+1);
} while (true);
}
return s;
}
/** Syntax: java Manipulate [word-file] < source-dic-entries > dest-dic-entries
Takes the output of ConsoleScannerFilter
(in RY format), converts the Wylie to Acip
and displays the result in csv format.
arch-palabras es usado solo cuando deseamos las palabras cambiadas
a otro archivo.
public static void main (String[] args) throws Exception
{
String linea, palabra, definicion, nuevaPalabra;
int marker;
PrintWriter psPalabras = null;
BufferedReader keyb = new BufferedReader(new InputStreamReader(System.in));
if (args.length==1)
psPalabras = new PrintWriter(new FileOutputStream(args[0]));
while ((linea=keyb.readLine())!=null)
{
if (linea.trim().equals("")) continue;
marker = linea.indexOf('-');
if (marker<0) // linea tiene error
{
palabra = linea;
definicion = "";
}
else
{
palabra = linea.substring(0, marker).trim();
definicion = linea.substring(marker+1).trim();
}
nuevaPalabra = wylieToAcip(palabra);
if (psPalabras!=null)
psPalabras.println(nuevaPalabra);
else System.out.print(nuevaPalabra + '\t');
if (definicion.equals(""))
System.out.println(palabra);
else
System.out.println(palabra + '\t' + definicion);
}
if (psPalabras!=null) psPalabras.flush();
}*/
/** From http://www.i18nfaq.com/2005/07/how-do-i-convert-ncr-format-to-java.html */
public static String NCR2UnicodeString(String str)
{
StringBuffer ostr = new StringBuffer();
int i1=0;
int i2=0;
while(i2<str.length())
{
i1 = str.indexOf("&#",i2);
if (i1 == -1 ) {
ostr.append(str.substring(i2, str.length()));
break ;
}
ostr.append(str.substring(i2, i1));
i2 = str.indexOf(";", i1);
if (i2 == -1 ) {
ostr.append(str.substring(i1, str.length()));
break ;
}
String tok = str.substring(i1+2, i2);
try {
int radix = 10 ;
if (tok.trim().charAt(0) == 'x') {
radix = 16 ;
tok = tok.substring(1,tok.length());
}
ostr.append((char) Integer.parseInt(tok, radix));
} catch (NumberFormatException exp) {
ostr.append('?') ;
}
i2++ ;
}
return new String(ostr) ;
}
public static String UnicodeString2NCR(String str)
{
StringBuffer ncr = new StringBuffer();
int i;
for (i=0; i<str.length(); i++)
{
ncr.append("&#" + Integer.toString(str.charAt(i)) + ";");
}
return ncr.toString();
}
public static String toJSON(String str)
{
int pos, i, len;
for (i=0; i<str.length(); i++)
{
pos = JSON_ESCAPABLES.indexOf(str.charAt(i));
if (pos>=0)
{
len = str.length();
str = str.substring(0, i) + "\\" + str.substring(i, len);
i++;
}
}
str = replace(str, "\b", "\\b");
str = replace(str, "\f", "\\f");
str = replace(str, "\n", "\\n");
str = replace(str, "\r", "\\r");
str = replace(str, "\t", "\\t");
return str;
}
public static boolean containsLetters(String str)
{
int i=0;
if (str==null) return false;
while (i<str.length()) if (Character.isLetter(str.charAt(i++))) return true;
return false;
}
public static String unescape(String s) {
int i=0,len=s.length();
char c;
StringBuffer sb = new StringBuffer(len);
while (i<len) {
c = s.charAt(i++);
if (c=='\\') {
if (i<len) {
c = s.charAt(i++);
if (c=='u') {
c = (char) Integer.parseInt(s.substring(i,i+4),16);
i += 4;
} // add other cases here as desired...
}} // fall through: \ escapes itself, quotes any character but u
sb.append(c);
}
return sb.toString();
}
public static int getTibetanUnicodeStart(String unicode, int pos)
{
for(; pos < unicode.length(); pos++ ) if(unicode.codePointAt(pos)>=TIBETAN_UNICODE_RANGE[0] && unicode.codePointAt(pos)<=TIBETAN_UNICODE_RANGE[1]) return pos;
return -1;
}
public static int getTibetanUnicodeEnd(String unicode, int pos)
{
for(; pos < unicode.length(); pos++ ) if(unicode.codePointAt(pos)<TIBETAN_UNICODE_RANGE[0] || unicode.codePointAt(pos)>TIBETAN_UNICODE_RANGE[1]) return pos;
return pos;
}
}

View file

@ -1,465 +1,507 @@
/*
The contents of this file are subject to the AMP Open Community License
Version 1.0 (the "License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License on the AMP web site
(http://www.tibet.iteso.mx/Guatemala/).
Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
License for the specific terms governing rights and limitations under the
License.
The Initial Developer of this software is Andres Montano Pellegrini. Portions
created by Andres Montano Pellegrini are Copyright 2001 Andres Montano
Pellegrini. All Rights Reserved.
Contributor(s): ______________________________________.
*/
package org.thdl.tib.scanner;
import java.io.PrintWriter;
import java.util.MissingResourceException;
import java.util.ResourceBundle;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import org.thdl.util.ThdlOptions;
/** Interfase to provide access to an on-line dictionary through a form in html;
Inputs Tibetan text (Roman script only) and displays the
words (Roman or Tibetan script) with their definitions.
Runs on the server and is called upon through an HTTP request directly
by the browser. Requires no additional software installed on the client.
@author Andr&eacute;s Montano Pellegrini
*/
public class OnLineScannerFilter extends HttpServlet
{
private final static String propertyFile = "dictionary";
private final static String dictNameProperty = "onlinescannerfilter.dict-file-name";
private final static String otherLinksProperty = "onlinescannerfilter.links-to-other-stuff";
private final static String moreLinksProperty = "onlinescannerfilter.links-to-more-stuff";
private final static String clearStr = "Clear";
private final static String buttonStr = "button";
private final static String scriptStr = "script";
private final static String tibetanStr = "tibetan";
ResourceBundle rb;
private TibetanScanner scanner;
private String dictionaries[];
private ScannerLogger sl;
public OnLineScannerFilter() //throws Exception
{
rb = ResourceBundle.getBundle(propertyFile);
sl = new ScannerLogger();
try
{
scanner = new LocalTibetanScanner(rb.getString(dictNameProperty), false);
}
catch (Exception e)
{
sl.writeLog("1\t1");
sl.writeException(e);
}
dictionaries = scanner.getDictionaryDescriptions();
sl.writeLog("2\t1");
}
synchronized public void doGet(HttpServletRequest request,
HttpServletResponse response) //throws IOException, ServletException
{
String answer, parrafo = null, checkboxName;
// if this line is included in the constructor, it works on the orion server but not on wyllie!
ThdlOptions.setUserPreference("thdl.rely.on.system.tmw.fonts", true);
ThdlOptions.setUserPreference("thdl.rely.on.system.tm.fonts", true);
response.setContentType("text/html");
PrintWriter out;
sl.setUserIP(request.getRemoteAddr());
try
{
out = response.getWriter();
}
catch (Exception e)
{
sl.writeLog("1\t1");
sl.writeException(e);
return;
}
BitDictionarySource ds=null;
boolean checkedDicts[], allUnchecked, wantsTibetan, useTHDLBanner = (request.getParameter("thdlBanner")!=null);
// int percent=100;
out.println("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">");
out.println("<html xmlns=\"http://www.w3.org/1999/xhtml\">");
out.println("<head>");
if (useTHDLBanner)
{
out.println(" <title>Tibetan and Himalayan Digital Library - The Online Tibetan to English Translation/Dictionary Tool</title>");
out.println(" <meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />");
out.println(" <script type=\"text/javascript\" src=\"http://www.thdl.org/scripts/thdl_scripts.js\"></script>");
out.println(" <link rel=\"stylesheet\" type=\"text/css\" href=\"http://www.thdl.org/style/thdl-styles.css\"/>");
}
else
out.println(" <title>The Online Tibetan to English Translation/Dictionary Tool</title>");
out.println(" <meta name=\"keywords\" content=\"tibetan, english, dictionary, jim valby, rangjung yeshe, jeffrey hopkins, tsig mdzod chen mo, online, translation, scanner, parser, buddhism, language, processing, font, dharma, chos, tibet\">");
out.println(" <meta name=\"Description\" content=\"This Java tool takes Tibetan language passages and divides the passages up into their component phrases and words, and displays corresponding dictionary definitions.\">");
out.println(" <meta name=\"MSSmartTagsPreventParsing\" content=\"TRUE\">");
answer = request.getParameter(scriptStr);
/* script==null || makes default tibetan
script!=null && makes default roman
*/
wantsTibetan = (answer==null || answer.equals(tibetanStr));
/*if (wantsTibetan)
{
out.println("<style>.tmw {font: 28pt TibetanMachineWeb}");
out.println(".tmw1 {font: 28pt TibetanMachineWeb1}");
out.println(".tmw2 {font: 28pt TibetanMachineWeb2}");
out.println(".tmw3 {font: 28pt TibetanMachineWeb3}");
out.println(".tmw4 {font: 28pt TibetanMachineWeb4}");
out.println(".tmw5 {font: 28pt TibetanMachineWeb5}");
out.println(".tmw6 {font: 28pt TibetanMachineWeb6}");
out.println(".tmw7 {font: 28pt TibetanMachineWeb7}");
out.println(".tmw8 {font: 28pt TibetanMachineWeb8}");
out.println(".tmw9 {font: 28pt TibetanMachineWeb9}");
out.println("</style>");
}*/
out.println("</head>");
out.println("<body>");
if (useTHDLBanner)
{
out.println("<script type=\"text/javascript\" src=\"http://www.thdl.org/thdl/scripts/banner.js\"></script>");
out.println("<div id=\"sub_banner\">");
out.println("<div id=\"search\">");
out.println(" <form method=\"get\" action=\"http://www.google.com/u/thdl\">");
out.println(" <p>");
out.println(" <input type=\"text\" name=\"q\" id=\"q\" size=\"15\" maxlength=\"255\" value=\"\" />");
out.println(" <input type=\"submit\" name=\"sa\" id=\"sa\" value=\"Search\"/>");
out.println(" <input type=\"hidden\" name=\"hq\" id=\"hq\" value=\"inurl:orion.lib.virginia.edu\"/>");
out.println(" </p>");
out.println(" </form>");
out.println(" </div>");
out.println(" <div id=\"breadcrumbs\">");
out.println(" <a href=\"/thdl/index.html\">Home</a> &gt; <a href=\"/thdl/reference/index.html\">Reference</a> &gt; Translation Tool");
out.println(" </div>");
out.println("</div><!--END sub_banner-->");
out.println("<div id=\"main\">");
}
out.println("<h3 align=\"center\">The Online Tibetan to English Translation/Dictionary Tool</h3>");
try
{
out.println(rb.getString(otherLinksProperty));
}
catch (MissingResourceException e)
{
// do nothing
}
if (useTHDLBanner)
{
out.println("<form action=\"org.thdl.tib.scanner.OnLineScannerFilter?thdlBanner=on\" method=POST>");
}
else
{
out.println("<form action=\"org.thdl.tib.scanner.OnLineScannerFilter\" method=POST>");
}
out.println("<table border=\"0\" width=\"100%\">");
out.println(" <tr>");
out.println(" <td width=\"25%\">");
out.println(" <p>Display results in:</td>");
out.println(" <td width=\"75%\">");
out.println(" <p><input type=\"radio\" value=\"" + tibetanStr + "\" ");
if (wantsTibetan) out.println("checked ");
out.println("name=\"" + scriptStr + "\">Tibetan script (using <a href=\"http://www.thdl.org/xml/show.php?xml=/tools/tibfonts.xml&l=uva10928423419921\" target=\"_blank\">Tibetan Machine Uni font</a>)<br/>");
out.println(" <input type=\"radio\" value=\"roman\" ");
if (!wantsTibetan) out.println("checked ");
out.println("name=\"" + scriptStr + "\">Roman script</td>");
out.println(" </tr>");
out.println("</table>");
if (dictionaries!=null)
{
int i;
ds = scanner.getDictionarySource();
ds.reset();
checkedDicts = new boolean[dictionaries.length];
/* out.println(" <tr>");
out.println("<td width=\""+ percent +"%\">Search in dictionaries:</td>");*/
out.println("<p>Search in dictionaries: ");
allUnchecked=true;
for (i=0; i<dictionaries.length; i++)
{
checkboxName = "dict"+ i;
checkedDicts[i] = (request.getParameter(checkboxName)!=null);
}
allUnchecked=true;
for (i=0; i<dictionaries.length; i++)
{
if(checkedDicts[i])
{
allUnchecked=false;
break;
}
}
if (allUnchecked)
{
for (i=0; i<dictionaries.length; i++)
checkedDicts[i] = true;
}
for (i=0; i<dictionaries.length; i++)
{
checkboxName = "dict"+ i;
// out.print(" <td width=\"" + percent + "%\">");
out.print("<input type=\"checkbox\" name=\"" + checkboxName +"\" value=\""+ checkboxName +"\"");
if (checkedDicts[i])
{
out.print(" checked");
ds.add(i);
}
if (dictionaries[i]!=null)
out.print(">" + dictionaries[i] + " (" + DictionarySource.defTags[i] + ")&nbsp;&nbsp;&nbsp;");
else
out.print(">" + DictionarySource.defTags[i] + "&nbsp;&nbsp;&nbsp;");
// out.println(" + "</td>");
}
// out.println(" </tr>");
}
// fix for updates
else ds = BitDictionarySource.getAllDictionaries();
// out.println("</table>");
out.println("</p>");
out.println("<table border=\"0\" width=\"100%\">");
out.println(" <tr>");
out.println(" <td width=\"35%\">");
out.println(" <p><strong>Input text:</strong></p>");
out.println(" </td>");
out.println(" <td width=\"65%\">");
out.println(" <p> <input type=\"submit\" name=\"" + buttonStr + "\" value=\"Translate\"> <input type=\"submit\" name=\"" + buttonStr + "\" value=\"" + clearStr + "\"></p>");
out.println(" </td>");
out.println(" </tr>");
out.println("</table>");
out.print("<textarea rows=\"5\" name=\"parrafo\" cols=\"40\"");
if (wantsTibetan) out.print(" class=\"tib\"");
out.println(">");
// Paragraph should be empty if the user just clicked the clear button
answer = request.getParameter(buttonStr);
if (answer == null || answer != null && !answer.equals(clearStr))
{
parrafo = request.getParameter("parrafo");
if (parrafo!=null) out.print(parrafo);
}
out.println("</textarea>");
out.println("</form>");
try
{
out.println(rb.getString(moreLinksProperty));
}
catch (MissingResourceException e)
{
// do nothing
}
if (parrafo != null)
{
sl.writeLog("4\t1");
if (ds!=null && !ds.isEmpty())
desglosar(parrafo, out, wantsTibetan);
}
else sl.writeLog("3\t1");
out.println(TibetanScanner.copyrightHTML);
if (useTHDLBanner) out.println("</div><!--END main-->");
out.println("</body>");
out.println("</html>");
}
public void doPost(HttpServletRequest request,
HttpServletResponse response)
//throws IOException, ServletException
{
doGet(request, response);
}
synchronized public void desglosar(String in, PrintWriter pw, boolean tibetan)
{
//boolean hayMasLineas=true;
//int init = 0, fin;
//String linea;
Object words[];
if (!in.equals(""))
{
/* while (hayMasLineas)
{
fin = in.indexOf("\n",init);
if (fin<0)
{
linea = in.substring(init).trim();
hayMasLineas=false;
}
else
linea = in.substring(init, fin).trim();
scanner.scanBody(linea);
init = fin+1;
} */
scanner.clearTokens();
in = Manipulate.NCR2UnicodeString(in);
if (Manipulate.guessIfUnicode(in)) in = BasicTibetanTranscriptionConverter.unicodeToWylie(in);
else if (Manipulate.guessIfAcip(in)) in = BasicTibetanTranscriptionConverter.acipToWylie(in);
scanner.scanBody(in);
scanner.finishUp();
printText(pw, tibetan);
printAllDefs(pw, tibetan);
scanner.clearTokens();
}
}
public void printText(PrintWriter pw, boolean tibetan)
{
Token words[] = scanner.getTokenArray();
SwingWord word;
char pm;
int i;
if (words==null) return;
pw.print("<p>");
for (i=0; i < words.length; i++)
{
if (words[i] instanceof Word)
{
word = new SwingWord((Word)words[i]);
// if (word.getDefs().getDictionarySource()!=null)
pw.print(word.getLink(tibetan));
// else pw.print(word.getWylie() + " ");
}
else
{
if (words[i] instanceof PunctuationMark)
{
pm = words[i].toString().charAt(0);
switch (pm)
{
case '\n':
pw.println("</p>");
pw.print("<p>");
break;
case '<':
pw.print("&lt; ");
break;
case '>':
pw.print("&gt; ");
break;
default:
pw.print(pm + " ");
}
}
}
}
pw.println("</p>");
}
public void printAllDefs(PrintWriter pw, boolean tibetan) {
int i, j, k=0;
Word words[];
SwingWord word = null;
Definitions defs;
String tag;
DictionarySource ds;
ByteDictionarySource sourceb=null;
words = scanner.getWordArray(false);
if (words == null)
return;
pw.println("<table border=\"1\" width=\"100%\">");
for (j = 0; j < words.length; j++) {
try {
word = new SwingWord(words[j]);
defs = word.getDefs();
ds = defs.getDictionarySource();
pw.println(" <tr>");
if (ds == null) {
tag = "&nbsp;";
}
else {
if (FileSyllableListTree.versionNumber==2) {
tag = ds.getTag(0);
}
else {
sourceb = (ByteDictionarySource) ds;
k=0;
while (sourceb.isEmpty(k)) k++;
tag = sourceb.getTag(k);
k++;
}
}
pw.print(" <td width=\"20%\" rowspan=\"" + defs.def.length
+ "\" valign=\"top\"");
if (tibetan) pw.print(" class=\"tib\"");
pw.println(">" + word.getBookmark(tibetan) + "</td>");
pw.println(" <td width=\"12%\">" + tag + "</td>");
pw.println(" <td width=\"68%\">" + defs.def[0] + "</td>");
pw.println(" </tr>");
for (i = 1; i < defs.def.length; i++) {
pw.println(" <tr>");
if (FileSyllableListTree.versionNumber==2) {
tag = ds.getTag(i);
}
else {
while (sourceb.isEmpty(k)) k++;
tag = sourceb.getTag(k);
k++;
}
pw.println(" <td width=\"12%\">" + tag + "</td>");
pw.println(" <td width=\"68%\">" + defs.def[i] + "</td>");
//else pw.println(" <td width=\"80%\" colspan=\"2\">" + defs.def[i] + "</td>");
pw.println(" </tr>");
}
} catch (Exception e) {
sl.writeLog("1\t1\t" + word.getWylie());
sl.writeException(e);
}
}
pw.println("</table>");
}
public void destroy()
{
super.destroy();
sl.setUserIP(null);
sl.writeLog("5\t1");
scanner.destroy();
}
}
/*
The contents of this file are subject to the AMP Open Community License
Version 1.0 (the "License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License on the AMP web site
(http://www.tibet.iteso.mx/Guatemala/).
Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
License for the specific terms governing rights and limitations under the
License.
The Initial Developer of this software is Andres Montano Pellegrini. Portions
created by Andres Montano Pellegrini are Copyright 2001 Andres Montano
Pellegrini. All Rights Reserved.
Contributor(s): ______________________________________.
*/
package org.thdl.tib.scanner;
import java.io.PrintWriter;
import java.util.MissingResourceException;
import java.util.ResourceBundle;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import org.thdl.util.ThdlOptions;
/** Interfase to provide access to an on-line dictionary through a form in html;
Inputs Tibetan text (Roman script only) and displays the
words (Roman or Tibetan script) with their definitions.
Runs on the server and is called upon through an HTTP request directly
by the browser. Requires no additional software installed on the client.
@author Andr&eacute;s Montano Pellegrini
*/
public class OnLineScannerFilter extends HttpServlet
{
private final static String propertyFile = "dictionary";
private final static String dictNameProperty = "onlinescannerfilter.dict-file-name";
private final static String otherLinksProperty = "onlinescannerfilter.links-to-other-stuff";
private final static String moreLinksProperty = "onlinescannerfilter.links-to-more-stuff";
private final static String smallerLinksProperty = "onlinescannerfilter.links-to-smaller-stuff";
private final static String clearStr = "Clear";
private final static String buttonStr = "button";
private final static String scriptStr = "script";
private final static String tibetanStr = "tibetan";
ResourceBundle rb;
private TibetanScanner scanner;
private String dictionaries[];
private ScannerLogger sl;
public OnLineScannerFilter() //throws Exception
{
System.setProperty("java.awt.headless","true");
rb = ResourceBundle.getBundle(propertyFile);
sl = new ScannerLogger();
try
{
scanner = new LocalTibetanScanner(rb.getString(dictNameProperty), false);
}
catch (Exception e)
{
sl.writeLog("1\t1");
sl.writeException(e);
}
dictionaries = scanner.getDictionaryDescriptions();
sl.writeLog("2\t1");
}
synchronized public void doGet(HttpServletRequest request,
HttpServletResponse response) //throws IOException, ServletException
{
String answer, parrafo = null, checkboxName;
try
{
request.setCharacterEncoding("UTF8");
}
catch(Exception e)
{
// do nothing
}
// if this line is included in the constructor, it works on the orion server but not on wyllie!
ThdlOptions.setUserPreference("thdl.rely.on.system.tmw.fonts", true);
ThdlOptions.setUserPreference("thdl.rely.on.system.tm.fonts", true);
response.setContentType("text/html");
PrintWriter out;
sl.setUserIP(request.getRemoteAddr());
try
{
out = response.getWriter();
}
catch (Exception e)
{
sl.writeLog("1\t1");
sl.writeException(e);
return;
}
BitDictionarySource ds=null;
boolean checkedDicts[], allUnchecked, wantsTibetan, useTHDLBanner = (request.getParameter("thdlBanner")!=null);
// int percent=100;
out.println("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">");
out.println("<html xmlns=\"http://www.w3.org/1999/xhtml\">");
out.println("<head>");
out.println(" <meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />");
if (useTHDLBanner)
{
out.println(" <title>Tibetan and Himalayan Digital Library - The Online Tibetan to English Dictionary and Translation Tool</title>");
out.println(" <script type=\"text/javascript\" src=\"http://www.thdl.org/scripts/thdl_scripts.js\"></script>");
out.println(" <link rel=\"stylesheet\" type=\"text/css\" href=\"http://www.thdl.org/style/thdl-styles.css\"/>");
}
else
{
out.println(" <title>The Online Tibetan to English Dictionary and Translation Tool</title>");
out.println(" <link rel=\"stylesheet\" type=\"text/css\" href=\"stylesheets/base.css\"/>");
}
out.println(" <meta name=\"keywords\" content=\"tibetan, english, dictionary, jim valby, rangjung yeshe, jeffrey hopkins, tsig mdzod chen mo, online, translation, scanner, parser, buddhism, language, processing, font, dharma, chos, tibet\">");
out.println(" <meta name=\"Description\" content=\"This Java tool takes Tibetan language passages and divides the passages up into their component phrases and words, and displays corresponding dictionary definitions.\">");
out.println(" <meta name=\"MSSmartTagsPreventParsing\" content=\"TRUE\">");
answer = request.getParameter(scriptStr);
/* script==null || makes default tibetan
script!=null && makes default roman
*/
wantsTibetan = (answer==null || answer.equals(tibetanStr));
/*if (wantsTibetan)
{
out.println("<style>.tmw {font: 28pt TibetanMachineWeb}");
out.println(".tmw1 {font: 28pt TibetanMachineWeb1}");
out.println(".tmw2 {font: 28pt TibetanMachineWeb2}");
out.println(".tmw3 {font: 28pt TibetanMachineWeb3}");
out.println(".tmw4 {font: 28pt TibetanMachineWeb4}");
out.println(".tmw5 {font: 28pt TibetanMachineWeb5}");
out.println(".tmw6 {font: 28pt TibetanMachineWeb6}");
out.println(".tmw7 {font: 28pt TibetanMachineWeb7}");
out.println(".tmw8 {font: 28pt TibetanMachineWeb8}");
out.println(".tmw9 {font: 28pt TibetanMachineWeb9}");
out.println("</style>");
}*/
out.println("</head>");
out.println("<body>");
if (useTHDLBanner)
{
out.println("<script type=\"text/javascript\" src=\"http://www.thdl.org/thdl/scripts/banner.js\"></script>");
out.println("<div id=\"sub_banner\">");
out.println("<div id=\"search\">");
out.println(" <form method=\"get\" action=\"http://www.google.com/u/thdl\">");
out.println(" <p>");
out.println(" <input type=\"text\" name=\"q\" id=\"q\" size=\"15\" maxlength=\"255\" value=\"\" />");
out.println(" <input type=\"submit\" name=\"sa\" id=\"sa\" value=\"Search\"/>");
out.println(" <input type=\"hidden\" name=\"hq\" id=\"hq\" value=\"inurl:orion.lib.virginia.edu\"/>");
out.println(" </p>");
out.println(" </form>");
out.println(" </div>");
out.println(" <div id=\"breadcrumbs\">");
out.println(" <a href=\"/thdl/index.html\">Home</a> &gt; <a href=\"/thdl/reference/index.html\">Reference</a> &gt; Translation Tool");
out.println(" </div>");
out.println("</div><!--END sub_banner-->");
out.println("<div id=\"main\">");
}
try
{
out.println(rb.getString(otherLinksProperty));
}
catch (MissingResourceException e)
{
// do nothing
}
if (useTHDLBanner)
{
out.println("<form action=\"org.thdl.tib.scanner.OnLineScannerFilter?thdlBanner=on\" method=POST>");
}
else
{
out.println("<form action=\"org.thdl.tib.scanner.OnLineScannerFilter\" method=POST>");
}
out.println("<table border=\"0\" width=\"100%\">");
out.println(" <tr>");
out.println(" <td width=\"18%\" align=\"left\"><strong>Display results in:</strong></td>");
out.println(" <td width=\"41%\" align=\"right\">");
out.println(" <input type=\"radio\" value=\"" + tibetanStr + "\" ");
if (wantsTibetan) out.println("checked ");
out.println("name=\"" + scriptStr + "\">Tibetan script (<a href=\"http://www.thlib.org/tools/#wiki=/access/wiki/site/26a34146-33a6-48ce-001e-f16ce7908a6a/tibetan%20machine%20uni.html\" target=\"_top\">Tibetan Machine Uni</a> font)</td>");
out.println(" <td width=\"16%\" align=\"left\">");
out.println(" <input type=\"radio\" value=\"roman\" ");
if (!wantsTibetan) out.println("checked ");
out.println("name=\"" + scriptStr + "\">Roman script</td>");
out.println(" <td width=\"25%\" align=\"right\">");
out.println("<a href=\"http://www.thlib.org/tools/#wiki=/access/wiki/site/c06fa8cf-c49c-4ebc-007f-482de5382105/tibetan%20translation%20tool.html\" target=\"_top\">Help & Offline Installation</a></td>");
out.println(" </tr>");
if (dictionaries!=null)
{
int i;
ds = scanner.getDictionarySource();
ds.reset();
checkedDicts = new boolean[dictionaries.length];
/* out.println(" <tr>");
out.println("<td width=\""+ percent +"%\">Search in dictionaries:</td>");*/
out.println("<tr><td colspan=\"4\"><strong>Search in dictionaries: </strong>");
allUnchecked=true;
for (i=0; i<dictionaries.length; i++)
{
checkboxName = "dict"+ i;
checkedDicts[i] = (request.getParameter(checkboxName)!=null);
}
allUnchecked=true;
for (i=0; i<dictionaries.length; i++)
{
if(checkedDicts[i])
{
allUnchecked=false;
break;
}
}
if (allUnchecked)
{
for (i=0; i<dictionaries.length; i++)
checkedDicts[i] = true;
}
for (i=0; i<dictionaries.length; i++)
{
checkboxName = "dict"+ i;
// out.print(" <td width=\"" + percent + "%\">");
out.print("<input type=\"checkbox\" name=\"" + checkboxName +"\" value=\""+ checkboxName +"\"");
if (checkedDicts[i])
{
out.print(" checked");
ds.add(i);
}
if (dictionaries[i]!=null)
out.print(">" + dictionaries[i] + " (" + DictionarySource.defTags[i] + ")&nbsp;&nbsp;&nbsp;");
else
out.print(">" + DictionarySource.defTags[i] + "&nbsp;&nbsp;&nbsp;");
// out.println(" + "</td>");
}
out.println(" </td></tr>");
}
// fix for updates
else ds = BitDictionarySource.getAllDictionaries();
// out.println("</table>");
// out.println("</p>");
// out.println("<table border=\"0\" width=\"100%\">");
out.println(" <tr>");
out.println(" <td><strong>Input text:</strong></td>");
out.println(" <td><input type=\"submit\" name=\"" + buttonStr + "\" value=\"Translate\"> <input type=\"submit\" name=\"" + buttonStr + "\" value=\"" + clearStr + "\"></td>");
out.println(" <td colspan\"2\">&nbsp;</td");
out.println(" </tr>");
out.println("</table>");
answer = request.getParameter(buttonStr);
String smallerLinks=null;
if (answer == null || answer != null && !answer.equals(clearStr))
{
parrafo = request.getParameter("parrafo");
}
if (parrafo==null)
{
try
{
smallerLinks = rb.getString(smallerLinksProperty);
}
catch (MissingResourceException e)
{
// do nothing
}
}
if (smallerLinks!=null)
{
out.println("<table width=\"100%\">");
out.println("<tr>");
out.println("<td>");
}
out.print("<textarea rows=\"5\" name=\"parrafo\" cols=\"40\"");
if (wantsTibetan) out.print(" class=\"tib\"");
out.println(">");
// Paragraph should be empty if the user just clicked the clear button
answer = request.getParameter(buttonStr);
if (parrafo!=null)
{
out.print(parrafo);
}
out.println("</textarea>");
if (smallerLinks!=null)
{
out.println("</td>");
out.println("<td>");
out.println(smallerLinks);
out.println("</td>");
out.println("</tr>");
out.println("</table>");
}
out.println("</form>");
if (parrafo != null)
{
sl.writeLog("4\t1");
if (ds!=null && !ds.isEmpty())
{
desglosar(parrafo, out, wantsTibetan);
}
}
else sl.writeLog("3\t1");
out.println(TibetanScanner.copyrightHTML);
if (useTHDLBanner) out.println("</div><!--END main-->");
out.println("</body>");
out.println("</html>");
}
public void doPost(HttpServletRequest request,
HttpServletResponse response)
//throws IOException, ServletException
{
doGet(request, response);
}
synchronized public void desglosar(String in, PrintWriter pw, boolean tibetan)
{
//boolean hayMasLineas=true;
//int init = 0, fin;
String tmp;
Object words[];
if (!in.equals(""))
{
/* while (hayMasLineas)
{
fin = in.indexOf("\n",init);
if (fin<0)
{
linea = in.substring(init).trim();
hayMasLineas=false;
}
else
linea = in.substring(init, fin).trim();
scanner.scanBody(linea);
init = fin+1;
} */
scanner.clearTokens();
in = Manipulate.NCR2UnicodeString(in);
if (Manipulate.guessIfUnicode(in)) in = BasicTibetanTranscriptionConverter.unicodeToWylie(in);
else if (Manipulate.guessIfAcip(in)) in = BasicTibetanTranscriptionConverter.acipToWylie(in);
scanner.scanBody(in);
scanner.finishUp();
printText(pw, tibetan);
try
{
tmp = rb.getString(moreLinksProperty);
pw.println("<p>");
pw.println(tmp);
pw.println("</p>");
}
catch (MissingResourceException e)
{
// do nothing
}
printAllDefs(pw, tibetan);
scanner.clearTokens();
}
}
public void printText(PrintWriter pw, boolean tibetan)
{
Token words[] = scanner.getTokenArray();
SwingWord word;
char pm;
int i;
if (words==null) return;
pw.print("<p>");
for (i=0; i < words.length; i++)
{
if (words[i] instanceof Word)
{
word = new SwingWord((Word)words[i]);
// if (word.getDefs().getDictionarySource()!=null)
pw.print(word.getLink(tibetan));
// else pw.print(word.getWylie() + " ");
}
else
{
if (words[i] instanceof PunctuationMark)
{
pm = words[i].toString().charAt(0);
switch (pm)
{
case '\n':
pw.println("</p>");
pw.print("<p>");
break;
case '<':
pw.print("&lt; ");
break;
case '>':
pw.print("&gt; ");
break;
default:
pw.print(pm + " ");
}
}
}
}
pw.println("</p>");
}
public void printAllDefs(PrintWriter pw, boolean tibetan) {
int i, j, k=0;
Word words[];
SwingWord word = null;
Definitions defs;
String tag;
DictionarySource ds;
ByteDictionarySource sourceb=null;
words = scanner.getWordArray(false);
if (words == null) return;
pw.println("<table border=\"1\" width=\"100%\">");
for (j = 0; j < words.length; j++) {
try {
word = new SwingWord(words[j]);
defs = word.getDefs();
ds = defs.getDictionarySource();
pw.println(" <tr>");
if (ds == null) {
tag = "&nbsp;";
}
else {
if (FileSyllableListTree.versionNumber==2) {
tag = ds.getTag(0);
}
else {
sourceb = (ByteDictionarySource) ds;
k=0;
while (sourceb.isEmpty(k)) k++;
tag = sourceb.getTag(k);
k++;
}
}
pw.print(" <td width=\"20%\" rowspan=\"" + defs.def.length
+ "\" valign=\"top\"");
if (tibetan) pw.print(" class=\"tib\"");
pw.println(">" + word.getBookmark(tibetan) + "</td>");
pw.println(" <td width=\"12%\">" + tag + "</td>");
pw.println(" <td width=\"68%\">" + defs.def[0] + "</td>");
pw.println(" </tr>");
for (i = 1; i < defs.def.length; i++) {
pw.println(" <tr>");
if (FileSyllableListTree.versionNumber==2) {
tag = ds.getTag(i);
}
else {
while (sourceb.isEmpty(k)) k++;
tag = sourceb.getTag(k);
k++;
}
pw.println(" <td width=\"12%\">" + tag + "</td>");
pw.println(" <td width=\"68%\">" + defs.def[i] + "</td>");
//else pw.println(" <td width=\"80%\" colspan=\"2\">" + defs.def[i] + "</td>");
pw.println(" </tr>");
}
} catch (Exception e) {
sl.writeLog("1\t1\t" + word.getWylie());
sl.writeException(e);
}
}
pw.println("</table>");
}
public void destroy()
{
super.destroy();
sl.setUserIP(null);
sl.writeLog("5\t1");
scanner.destroy();
}
}

View file

@ -1,162 +1,206 @@
/*
The contents of this file are subject to the AMP Open Community License
Version 1.0 (the "License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License on the AMP web site
(http://www.tibet.iteso.mx/Guatemala/).
Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
License for the specific terms governing rights and limitations under the
License.
The Initial Developer of this software is Andres Montano Pellegrini. Portions
created by Andres Montano Pellegrini are Copyright 2001 Andres Montano
Pellegrini. All Rights Reserved.
Contributor(s): ______________________________________.
*/
package org.thdl.tib.scanner;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.util.ResourceBundle;
import javax.servlet.GenericServlet;
import javax.servlet.ServletRequest;
import javax.servlet.ServletResponse;
/** Running on the server, receives the tibetan text from applet/applications running on
the client and sends them the words with their definitions through the Internet.
Requests are made through {@link RemoteTibetanScanner}.
@author Andr&eacute;s Montano Pellegrini
@see RemoteTibetanScanner
*/
public class RemoteScannerFilter extends GenericServlet
{
private TibetanScanner scanner;
private BitDictionarySource ds;
private ScannerLogger sl;
public RemoteScannerFilter()
{
ResourceBundle rb = ResourceBundle.getBundle("dictionary");
sl = new ScannerLogger();
try
{
scanner = new LocalTibetanScanner(rb.getString("onlinescannerfilter.dict-file-name"),false);
}
catch (Exception e)
{
sl.writeLog("1\t2");
sl.writeException(e);
}
ds = scanner.getDictionarySource();
sl.writeLog("Creation\t2");
}
public void service(ServletRequest req, ServletResponse res) //throws ServletException, IOException
{
BufferedReader br;
res.setContentType ("text/plain");
sl.setUserIP(req.getRemoteAddr());
Word word = null, words[] = null;
PrintWriter out;
try
{
out = res.getWriter();
}
catch (Exception e)
{
sl.writeLog("1\t2");
sl.writeException(e);
return;
}
int i;
String linea, dicts = req.getParameter("dicts"), dicDescrip[];
if (dicts!=null)
{
if (dicts.equals("names"))
{
sl.writeLog("3\t2");
dicDescrip = scanner.getDictionaryDescriptions();
if (dicDescrip==null)
{
out.close();
return;
}
for (i=0; i<dicDescrip.length; i++)
{
out.println(dicDescrip[i] + "," + DictionarySource.defTags[i]);
}
out.close();
return;
}
else
{
ds.setDicts(Integer.parseInt(dicts));
}
}
try
{
br = new BufferedReader(new InputStreamReader(req.getInputStream()));
}
catch (Exception e)
{
sl.writeLog("1\t2");
sl.writeException(e);
return;
}
/* FIXME: sometimes getDef raises a NullPointerException.
In the meantime, I'll just keep it from crashing
*/
sl.writeLog("4\t2");
try
{
scanner.clearTokens();
while((linea = br.readLine())!= null)
scanner.scanLine(linea);
br.close();
scanner.finishUp();
words = scanner.getWordArray();
for (i=0; i<words.length; i++)
{
linea = words[i].getDef();
if (linea == null) continue;
out.println(words[i].getWylie());
out.println(linea);
out.println();
}
}
catch (Exception e)
{
sl.writeLog("1\t2\t" + word.getWylie());
sl.writeException(e);
}
scanner.clearTokens();
out.close();
}
public void destroy()
{
super.destroy();
sl.setUserIP(null);
sl.writeLog("5\t2");
scanner.destroy();
}
/*
The contents of this file are subject to the AMP Open Community License
Version 1.0 (the "License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License on the AMP web site
(http://www.tibet.iteso.mx/Guatemala/).
Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
License for the specific terms governing rights and limitations under the
License.
The Initial Developer of this software is Andres Montano Pellegrini. Portions
created by Andres Montano Pellegrini are Copyright 2001 Andres Montano
Pellegrini. All Rights Reserved.
Contributor(s): ______________________________________.
*/
package org.thdl.tib.scanner;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.util.ResourceBundle;
import javax.servlet.GenericServlet;
import javax.servlet.ServletRequest;
import javax.servlet.ServletResponse;
/** Running on the server, receives the tibetan text from applet/applications running on
the client and sends them the words with their definitions through the Internet.
Requests are made through {@link RemoteTibetanScanner}.
@author Andr&eacute;s Montano Pellegrini
@see RemoteTibetanScanner
*/
public class RemoteScannerFilter extends GenericServlet
{
private TibetanScanner scanner;
private BitDictionarySource ds;
private ScannerLogger sl;
private static final int INTERNAL = 1;
private static final int JSON = 2;
public RemoteScannerFilter()
{
System.setProperty("java.awt.headless","true");
ResourceBundle rb = ResourceBundle.getBundle("dictionary");
sl = new ScannerLogger();
try
{
scanner = new LocalTibetanScanner(rb.getString("onlinescannerfilter.dict-file-name"),false);
}
catch (Exception e)
{
sl.writeLog("1\t2");
sl.writeException(e);
}
scanner.getDictionaryDescriptions();
ds = scanner.getDictionarySource();
sl.writeLog("Creation\t2");
}
public void service(ServletRequest req, ServletResponse res) //throws ServletException, IOException
{
BufferedReader br;
int format, i, j, k;
try
{
req.setCharacterEncoding("UTF8");
}
catch(Exception e)
{
// do nothing
}
String linea, dicts = req.getParameter("dicts"), dicDescrip[], jwf = req.getParameter("jwf"), tag;
Definitions defs;
ByteDictionarySource dict_source;
if (jwf!=null) format = JSON;
else format = INTERNAL;
switch (format)
{
case INTERNAL:
res.setContentType ("text/plain");
break;
case JSON:
res.setContentType ("text/x-json");
}
sl.setUserIP(req.getRemoteAddr());
Word word = null, words[] = null;
PrintWriter out;
try
{
out = res.getWriter();
}
catch (Exception e)
{
sl.writeLog("1\t2");
sl.writeException(e);
return;
}
if (dicts!=null)
{
if (dicts.equals("names"))
{
sl.writeLog("3\t2");
dicDescrip = scanner.getDictionaryDescriptions();
if (dicDescrip==null)
{
out.close();
return;
}
for (i=0; i<dicDescrip.length; i++)
{
out.println(dicDescrip[i] + "," + DictionarySource.defTags[i]);
}
out.close();
return;
}
else
{
ds.setDicts(Integer.parseInt(dicts));
}
}
if (format==JSON)
{
out.println(jwf + "({\"words\":{");
}
try
{
scanner.clearTokens();
switch (format)
{
case INTERNAL:
br = req.getReader();
sl.writeLog("4\t2");
while((linea = br.readLine())!= null)
scanner.scanLine(linea);
br.close();
break;
case JSON:
linea = req.getParameter("text");
linea = Manipulate.NCR2UnicodeString(linea);
if (Manipulate.guessIfUnicode(linea)) linea = BasicTibetanTranscriptionConverter.unicodeToWylie(linea);
else if (Manipulate.guessIfAcip(linea)) linea = BasicTibetanTranscriptionConverter.acipToWylie(linea);
scanner.scanLine(linea);
}
scanner.finishUp();
words = scanner.getWordArray();
for (i=0; i<words.length; i++)
{
linea = words[i].getDef();
if (linea == null) continue;
switch (format)
{
case INTERNAL:
out.println(words[i].getWylie());
out.println(linea);
out.println();
break;
case JSON:
out.println("\"" + BasicTibetanTranscriptionConverter.wylieToHTMLUnicode(words[i].token) + "\": [");
defs = words[i].getDefs();
dict_source = (ByteDictionarySource)defs.getDictionarySource();
k=0;
for (j=0; j<defs.def.length; j++)
{
while (dict_source.isEmpty(k)) k++;
tag = dict_source.getTag(k);
k++;
out.println("\"" + tag + "\",");
out.print("\"" + Manipulate.toJSON(defs.def[j]) + "\"");
if (j==defs.def.length-1) out.println();
else out.println(",");
}
out.print("]");
if (i<words.length-1) out.println(",");
}
}
if (format==JSON) out.println("}});");
}
catch (Exception e)
{
sl.writeLog("1\t2\t" + word.getWylie());
sl.writeException(e);
}
scanner.clearTokens();
out.close();
}
public void destroy()
{
super.destroy();
sl.setUserIP(null);
sl.writeLog("5\t2");
scanner.destroy();
}
}

View file

@ -1,86 +1,93 @@
/*
The contents of this file are subject to the AMP Open Community License
Version 1.0 (the "License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License on the AMP web site
(http://www.tibet.iteso.mx/Guatemala/).
Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
License for the specific terms governing rights and limitations under the
License.
The Initial Developer of this software is Andres Montano Pellegrini. Portions
created by Andres Montano Pellegrini are Copyright 2001 Andres Montano
Pellegrini. All Rights Reserved.
Contributor(s): ______________________________________.
*/
package org.thdl.tib.scanner;
import java.io.FileOutputStream;
import java.io.PrintStream;
import java.util.Calendar;
import java.util.ResourceBundle;
/** Designed to keep a log of the transactions taking place in the
servlet version of the translation tool.
@author Andr&eacute;s Montano Pellegrini
*/
public class ScannerLogger
{
private String fileName;
private String lastIP;
public ScannerLogger()
{
ResourceBundle rb = ResourceBundle.getBundle("dictionary");
fileName = rb.getString("remotescannerfilter.log-file-name");
lastIP = null;
}
public String getCurrentTime()
{
Calendar rightNow = Calendar.getInstance();
return Integer.toString(rightNow.get(Calendar.YEAR)) + "\t" + Integer.toString(rightNow.get(Calendar.MONTH)) + "\t" + Integer.toString(rightNow.get(Calendar.DAY_OF_MONTH)) + "\t" + Integer.toString(rightNow.get(Calendar.HOUR_OF_DAY)) + "\t" + Integer.toString(rightNow.get(Calendar.MINUTE)) + "\t" + Integer.toString(rightNow.get(Calendar.SECOND));
}
public void setUserIP(String lastIP)
{
this.lastIP = lastIP;
}
synchronized public void writeLog(String s)
{
PrintStream pw = getPrintStream();
if (lastIP!=null) pw.print(lastIP);
else pw.print("-");
pw.println("\t" + getCurrentTime() + "\t" + s);
pw.flush();
pw.close();
}
private PrintStream getPrintStream()
{
PrintStream pw;
try
{
pw = new PrintStream(new FileOutputStream(fileName, true));
return pw;
}
catch (Exception e)
{
e.printStackTrace();
}
return null;
}
synchronized public void writeException(Exception e)
{
PrintStream pw = getPrintStream();
e.printStackTrace(pw);
pw.flush();
pw.close();
}
/*
The contents of this file are subject to the AMP Open Community License
Version 1.0 (the "License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License on the AMP web site
(http://www.tibet.iteso.mx/Guatemala/).
Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
License for the specific terms governing rights and limitations under the
License.
The Initial Developer of this software is Andres Montano Pellegrini. Portions
created by Andres Montano Pellegrini are Copyright 2001 Andres Montano
Pellegrini. All Rights Reserved.
Contributor(s): ______________________________________.
*/
package org.thdl.tib.scanner;
import java.io.FileOutputStream;
import java.io.PrintStream;
import java.util.Calendar;
import java.util.ResourceBundle;
/** Designed to keep a log of the transactions taking place in the
servlet version of the translation tool.
@author Andr&eacute;s Montano Pellegrini
*/
public class ScannerLogger
{
private String fileName;
private String lastIP;
private boolean enabled;
public ScannerLogger()
{
String temp;
ResourceBundle rb = ResourceBundle.getBundle("dictionary");
fileName = rb.getString("remotescannerfilter.log-file-name");
temp = rb.getString("remotescannerfilter.logging-enabled");
if (temp==null) enabled = false;
else enabled = temp.toLowerCase().equals("yes");
lastIP = null;
}
public String getCurrentTime()
{
Calendar rightNow = Calendar.getInstance();
return Integer.toString(rightNow.get(Calendar.YEAR)) + "\t" + Integer.toString(rightNow.get(Calendar.MONTH)) + "\t" + Integer.toString(rightNow.get(Calendar.DAY_OF_MONTH)) + "\t" + Integer.toString(rightNow.get(Calendar.HOUR_OF_DAY)) + "\t" + Integer.toString(rightNow.get(Calendar.MINUTE)) + "\t" + Integer.toString(rightNow.get(Calendar.SECOND));
}
public void setUserIP(String lastIP)
{
this.lastIP = lastIP;
}
synchronized public void writeLog(String s)
{
if (!enabled) return;
PrintStream pw = getPrintStream();
if (lastIP!=null) pw.print(lastIP);
else pw.print("-");
pw.println("\t" + getCurrentTime() + "\t" + s);
pw.flush();
pw.close();
}
private PrintStream getPrintStream()
{
PrintStream pw;
try
{
pw = new PrintStream(new FileOutputStream(fileName, true));
return pw;
}
catch (Exception e)
{
e.printStackTrace();
}
return null;
}
synchronized public void writeException(Exception e)
{
if (!enabled) return;
PrintStream pw = getPrintStream();
e.printStackTrace(pw);
pw.flush();
pw.close();
}
}

View file

@ -1,257 +1,254 @@
/*
The contents of this file are subject to the AMP Open Community License
Version 1.0 (the "License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License on the AMP web site
(http://www.tibet.iteso.mx/Guatemala/).
Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
License for the specific terms governing rights and limitations under the
License.
The Initial Developer of this software is Andres Montano Pellegrini. Portions
created by Andres Montano Pellegrini are Copyright 2001 Andres Montano
Pellegrini. All Rights Reserved.
Contributor(s): ______________________________________.
*/
package org.thdl.tib.scanner;
import org.thdl.util.SimplifiedLinkedList;
import org.thdl.util.SimplifiedListIterator;
import org.thdl.util.ThdlVersion;
/** Defines the core methods required to provide access to a dictionary; local or remote.
@author Andr&eacute;s Montano Pellegrini
*/
public abstract class TibetanScanner
{
public static final String version = "The Tibetan to English Translation Tool, version 3.3.0 compiled on " + ThdlVersion.getTimeOfCompilation() + ". ";
public static final String copyrightUnicode="Copyright " + '\u00A9' + " 2000-200??6 by Andr" + '\u00E9' + "s Montano Pellegrini, all rights reserved.";
public static final String copyrightASCII="Copyright 2000-2006 by Andres Montano Pellegrini, all rights reserved.";
public static final String copyrightHTML="<hr><small><strong>" + version + "Copyright &copy; 2000-2006 by <a href=\"http://www.people.virginia.edu/~am2zb/\" target=\"_blank\">Andr&eacute;s Montano Pellegrini.</a><br/>All rights reserved.</strong></small>";
public static final int NORMAL_MODE=1;
public static final int DEBUG_MODE=2;
public static int mode;
static
{
mode = NORMAL_MODE;
}
public static final String aboutTomeraider=
"Welcome to Jeffrey Hopkins' Tibetan-Sanskrit-English Dictionary version 2.0.0!<p>\n" +
"This file was automatically generated using software developed by Andres Montano Pellegrini. " +
"For more information, see http://www.people.virginia.edu/~am2zb/tibetan .<p>" +
"<b>Formulator and Editor</b>: Jeffrey Hopkins<br>\n" +
"<b>Contributors</b>: Joe Wilson, Craig Preston, John Powers, Nathanial Garson, " +
"Paul Hackett, Andres Montano<p>" +
"A project of the Tibetan Studies Institute, Boonesville, Virginia, and the " +
"University of Virginia Tibetan Studies Program<p>" +
"<i>\u00A9 Jeffrey Hopkins 1992.</i><p>" +
"<b>Apology</b><p>" +
"This is a work in progress in crude form that is being shared with students " +
"of the Tibetan language mainly in order to receive input for further " +
"development. The English translations of the entries can be said only to " +
"represent what contributors, over a span of over thirty years, thought were " +
"my current translations. A small number are simply wrong; others need to be " +
"updated; and all will receive much more attention and, hence, detail.<p>\n" +
"The Dictionary has been entered into a database with fields for the entry, " +
"Sanskrit, tenses, my English, a few others interests, examples, " +
"definition, divisions, and comments. At this point, very few entries " +
"contain all of these items, but the plan is provide these, where " +
"appropriate, over the years. Translations for entries that have arisen from " +
"my work and from interactions with my students are in boldface, whereas " +
"those from other works are in regular type on separate lines and are marked " +
"with an initial at the end of the line. A key to these markings is given on " +
"the next page.<p>\n" +
"(Please note that the radical signs for Sanskrit roots are, after the first" +
"letter of the alphabet, in a state of disarray.)<p>\n" +
"I hope that you will bear with the many inadequacies of this first release.<p>\n" +
"Paul Jeffrey Hopkins<br>\n" +
"Professor of Tibetan Studies<p>\n" +
"<b>Abbreviations</b><p>\n" +
"B-7: ??? {PH: see dngos med ... & dngos po (synonyms) }<p>\n" +
"BJ: Bel-jor-hlun-drup (Dpal \'byor lhun grub). Legs bshad snying po\'i dka' " +
"\'grel bstan pa\'i sgron me (Buxaduar: Sera Monastery, 1968).<p>\n" +
"BK: ??? {PH: see bka\' (examples) }<p>\n" +
"BR: Losang Gyatso (Blo bzang rgya mtsho). Presentation of Knowledge and " +
"Awareness (Blo rig).<p>\n" +
"BWT: Ngak-wang-bel-den (Ngag dbang dpal ldan). Annotations for " +
"[Jam-yang-shay-ba\'s] \"Tenets\" (Grub mtha\' chen mo\'i mchan).<p>\n" +
"C: Conze, Edward. Materials for a Dictionary of the Prajnaparamita " +
"Literature (Tokyo: Suzuki Research Foundation, 1967).<p>\n" +
"col.: colloquial<p>\n" +
"D1: Pur-bu-jok (Phur bu lcog). Presentation of the Collected Topics " +
"(Part 1: Bsdus grwa chung ngu).<p>\n" +
"D2: Pur-bu-jok (Phur bu lcog). Presentation of the Collected Topics " +
"(Part 2: Bsdus grwa \'bring).<p>\n" +
"DASI: Decisive Analysis of Special Insight.<p>\n" +
"DG: Germano, David. Poetic Thought, the Intelligent Universe, and the " +
"Mystery of Self: the Tantric Synthesis of rDzogs Chen in Fourteenth Century " +
"Tibet. (Ph.d. dissertation, University of Wisconsin, Madison,WI 1992).<p>\n" +
"DK: Dzong-ka-ba (Tsong kha pa blo bzang grags pa). Drang ba dang nges pa\'i " +
"don rnam par phye ba'i bstan bcos legs bshad snying po (Sarnath: Pleasure of " +
"Elegant Sayings Press, 1979).<p>\n" +
"Ganden Triba: Oral commentary of Ganden Triba Jam-bel-shen-pen.<p>\n" +
"GCT: Ngak-wang-dra-shi (Ngag dbang bkra shis). Collected Topics by a " +
"Spiritual Son of Jam-yang-shay-ba (Sgo mang sras bsdus grwa).<p>\n" +
"GD: Dreyfus, George. Ontology, Philosophy of Language, and Epistemology in " +
"Buddhist Tradition (Ph.d. dissertation. Religious Studies, University of " +
"Virginia, Charlottesville,VA 1991).<p>\n" +
"Gon-chok: Gon-chok-jik-may-wang-bo (Dkon mchog \'jigs med dbang po). " +
"Precious Garland of Tenets (Grub mtha\' rin chen phreng ba).<p>\n" +
"Jang.: Jang-gya (Lcang skya rol pa\'i rdo rje). " +
"Presentation of Tenets (Lcang skya grub mtha').<p>\n" +
"JKA: ??? {PH: see mngon sum (definition) } <p>\n" +
"KS: Khetsun Sangpo, Biographical Dictionary of Tibet and Tibetan Buddhism. " +
"(LTWA: Dharamsala, HP)<p>\n" +
"L: Lamotte, Etienne. Samdhinirmocana-sutra " +
"(Louvain: Universite de Louvain, 1935).<p>\n" +
"LAK: Jam-bel-sam-pel (\'Jam dpal bsam phel). Presentation of Awareness and " +
"Knowledge (Blo rig gi rnam bzhag).<p>\n" +
"Lati: Oral commentary by Lati Rinbochay.<p>\n" +
"LCh: Chandra, Lokesh. Tibetan-Sanskrit Dictionary (New Delhi, 1987).<p>\n" +
"LG: Losang Gyatso\'s Blo rig.<p>\n" +
"LM: ??? {PH: see skye bu chung ngu ... }<p>\n" +
"LR: Hopkins, Jeffrey. Glossary for Gsung rab kun gyi snying po lam rim gyi " +
"gtso bo rnam pa gsung gi khrid yid gzhan phan snying po (by Panchen Lama IV).<p>\n" +
"LSR: Tsul-trim-nam-gyel (Tshul khrims rnam rgyal). Presentation of Signs " +
"and Reasonings (Rtags rigs kyi rnam bzhag).<p>\n" +
"LWT: Lo-sang-gon-chok (Blo bzang dkon mchog). Word Commentary on the Root " +
"Text of [Jam-yang-shay-ba\'s] \"Tenets\".<p>\n" +
"ME: Hopkins, Jeffrey. Meditation on Emptiness (London, Wisdom, 1983).<p>\n" +
"MGP: ??? {PH: see bkag (examples) }<p>\n" +
"MSA: Nagao, Gadjin. Index to the Mahayanasutralankara (Tokyo: Nippon " +
"Gakujutsu Shinkvo-kai, 1958).<p>\n" +
"MSI: Dzong-ka-ba (Tsong kha pa blo bzang grags pa). Middling Exposition of " +
"Special Insight (Lhag mthong \'bring).<p>\n" +
"MV: Nagao, Gadjin. Index to the Madhyanta-vibhaga (Tokyo: 1961).<p>\n" +
"N: Zuiryu NAKAMURA. Index to the Ratnagotravibhaga-mahayanottaratantra-sastra " +
"(Tokyo, 1961).<p>\n" +
"P: Peking edition of the Tripitaka.<p>\n" +
"PGP: Lo-sang-da-yang (Blo bzang rta dbyangs). Presentation of the Grounds " +
"and Paths in Prasangika (Thal \'gyur pa\'i sa lam).<p>\n" +
"PP: Candrakirti. Prasannapada.<p>\n" +
"S: Samdhinirmocana-sutra (Tok Palace version, 160 pp., Leh, Ladakh: Shesrig " +
"Dpemzod, 1975-1980, vol. ja).<p>\n" +
"TAK: Pur-bu-jok (Phur bu lcog). Explanation of the Presentation of Objects " +
"and Object-Possessors as Well as Awareness and Knowledge (Yul dang yul can " +
"dang blo rig).<p>\n" +
"TCT: Pur-bu-jok (Phur bu lcog). Presentation of the Collected Topics (Yongs " +
"\'dzin bsdus grwa).<p>\n" +
"TGP: Nga-wang-bel-den (Ngag dbang dpal ldan). Treatise Illuminating the " +
"Presentation of the Four Great Secret Tantra Sets (Sngags kyi sa lam).<p>\n" +
"TN: Vasubandhu. Trisvabhavanirdesha.<p>\n" +
"VM: Bu-don-rin-chen-drup (bu ston rin chen grub), The Practice of " +
"(Jnandagarbha\'s) \"The Rite of the Vajra-Element Mandala: The Source of All " +
"Vajras\": A Precious Enhancer of Thought (rDo rje dbyings kyi dkyil \'khor gyi " +
"cho ga rdo rje thams cad \'byung ba zhes bya ba\'i lag len rin chen bsam \'phel), " +
"in Collected Works, Part 12 na. Lhasa: Zhol Printing House, 1990.<p>\n" +
"Y: Susumi YAMAGUCHI.Index to the Prasannapada Madhyamakavrtti. " +
"(Kyoto: Heirakuji-Shoten, 1974).<p>\n" +
"YT: Oral commentary by Yeshi Thupten.";
protected SimplifiedLinkedList wordList;
public TibetanScanner()
{
wordList = new SimplifiedLinkedList();
}
public void clearTokens()
{
wordList = new SimplifiedLinkedList();
}
public Token[] getTokenArray()
{
int n=wordList.size();
if (n==0) return null;
Token token[] = new Token[n];
SimplifiedListIterator li = wordList.listIterator();
while(li.hasNext())
token[--n] = (Token)li.next();
return token;
}
public SimplifiedLinkedList getTokenLinkedList()
{
return wordList;
}
public Word[] getWordArray()
{
return getWordArray(true);
}
public Word[] getWordArray(boolean includeRepeated)
{
Token token;
Word array[], word;
int n=0;
SimplifiedListIterator li = wordList.listIterator();
SimplifiedLinkedList ll2, ll = new SimplifiedLinkedList();
while(li.hasNext())
{
token = (Token) li.next();
if (token instanceof Word)
{
ll.addLast(token);
}
}
if (includeRepeated)
{
n = ll.size();
if (n==0) return null;
array = new Word[n];
li = ll.listIterator();
n=0;
while (li.hasNext())
{
array[n++] = (Word) li.next();
}
}
else
{
ll2 = new SimplifiedLinkedList();
li = ll.listIterator();
while(li.hasNext())
{
word = (Word) li.next();
if (!ll2.contains(word)) ll2.addLast(word);
}
n = ll2.size();
if (n==0) return null;
array = new Word[n];
li = ll2.listIterator();
while (li.hasNext())
{
array[--n] = (Word) li.next();
}
}
return array;
}
public abstract void scanLine(String linea);
public abstract void scanBody(String linea);
public abstract void finishUp();
public abstract BitDictionarySource getDictionarySource();
public abstract String[] getDictionaryDescriptions();
public abstract void destroy();
}
/*
The contents of this file are subject to the AMP Open Community License
Version 1.0 (the "License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License on the AMP web site
(http://www.tibet.iteso.mx/Guatemala/).
Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
License for the specific terms governing rights and limitations under the
License.
The Initial Developer of this software is Andres Montano Pellegrini. Portions
created by Andres Montano Pellegrini are Copyright 2001 Andres Montano
Pellegrini. All Rights Reserved.
Contributor(s): ______________________________________.
*/
package org.thdl.tib.scanner;
import org.thdl.util.SimplifiedLinkedList;
import org.thdl.util.SimplifiedListIterator;
import org.thdl.util.ThdlVersion;
/** Defines the core methods required to provide access to a dictionary; local or remote.
@author Andr&eacute;s Montano Pellegrini
*/
public abstract class TibetanScanner
{
public static final String version = "The Tibetan to English Translation Tool, version 3.3.0 compiled on " + ThdlVersion.getTimeOfCompilation() + ". ";
public static final String copyrightUnicode="Copyright " + '\u00A9' + " 2000-2009 by Andr" + '\u00E9' + "s Montano Pellegrini, all rights reserved.";
public static final String copyrightASCII="Copyright 2000-2009 by Andres Montano Pellegrini, all rights reserved.";
public static final String copyrightHTML="<hr><small><strong>" + version + "Copyright &copy; 2000-2009 by <a href=\"http://www.gaugeus.com/ramblings\" target=\"_blank\">Andr&eacute;s Montano Pellegrini.</a> All rights reserved.</strong></small>";
public static final int NORMAL_MODE=1;
public static final int DEBUG_MODE=2;
public static int mode;
static
{
mode = NORMAL_MODE;
}
public static final String aboutTomeraider=
"Welcome to Jeffrey Hopkins' Tibetan-Sanskrit-English Dictionary version 2.0.0!<p>\n" +
"This file was automatically generated using software developed by Andres Montano Pellegrini. " +
"For more information, see http://www.people.virginia.edu/~am2zb/tibetan .<p>" +
"<b>Formulator and Editor</b>: Jeffrey Hopkins<br>\n" +
"<b>Contributors</b>: Joe Wilson, Craig Preston, John Powers, Nathanial Garson, " +
"Paul Hackett, Andres Montano<p>" +
"A project of the Tibetan Studies Institute, Boonesville, Virginia, and the " +
"University of Virginia Tibetan Studies Program<p>" +
"<i>\u00A9 Jeffrey Hopkins 1992.</i><p>" +
"<b>Apology</b><p>" +
"This is a work in progress in crude form that is being shared with students " +
"of the Tibetan language mainly in order to receive input for further " +
"development. The English translations of the entries can be said only to " +
"represent what contributors, over a span of over thirty years, thought were " +
"my current translations. A small number are simply wrong; others need to be " +
"updated; and all will receive much more attention and, hence, detail.<p>\n" +
"The Dictionary has been entered into a database with fields for the entry, " +
"Sanskrit, tenses, my English, a few others interests, examples, " +
"definition, divisions, and comments. At this point, very few entries " +
"contain all of these items, but the plan is provide these, where " +
"appropriate, over the years. Translations for entries that have arisen from " +
"my work and from interactions with my students are in boldface, whereas " +
"those from other works are in regular type on separate lines and are marked " +
"with an initial at the end of the line. A key to these markings is given on " +
"the next page.<p>\n" +
"(Please note that the radical signs for Sanskrit roots are, after the first" +
"letter of the alphabet, in a state of disarray.)<p>\n" +
"I hope that you will bear with the many inadequacies of this first release.<p>\n" +
"Paul Jeffrey Hopkins<br>\n" +
"Professor of Tibetan Studies<p>\n" +
"<b>Abbreviations</b><p>\n" +
"B-7: ??? {PH: see dngos med ... & dngos po (synonyms) }<p>\n" +
"BJ: Bel-jor-hlun-drup (Dpal \'byor lhun grub). Legs bshad snying po\'i dka' " +
"\'grel bstan pa\'i sgron me (Buxaduar: Sera Monastery, 1968).<p>\n" +
"BK: ??? {PH: see bka\' (examples) }<p>\n" +
"BR: Losang Gyatso (Blo bzang rgya mtsho). Presentation of Knowledge and " +
"Awareness (Blo rig).<p>\n" +
"BWT: Ngak-wang-bel-den (Ngag dbang dpal ldan). Annotations for " +
"[Jam-yang-shay-ba\'s] \"Tenets\" (Grub mtha\' chen mo\'i mchan).<p>\n" +
"C: Conze, Edward. Materials for a Dictionary of the Prajnaparamita " +
"Literature (Tokyo: Suzuki Research Foundation, 1967).<p>\n" +
"col.: colloquial<p>\n" +
"D1: Pur-bu-jok (Phur bu lcog). Presentation of the Collected Topics " +
"(Part 1: Bsdus grwa chung ngu).<p>\n" +
"D2: Pur-bu-jok (Phur bu lcog). Presentation of the Collected Topics " +
"(Part 2: Bsdus grwa \'bring).<p>\n" +
"DASI: Decisive Analysis of Special Insight.<p>\n" +
"DG: Germano, David. Poetic Thought, the Intelligent Universe, and the " +
"Mystery of Self: the Tantric Synthesis of rDzogs Chen in Fourteenth Century " +
"Tibet. (Ph.d. dissertation, University of Wisconsin, Madison,WI 1992).<p>\n" +
"DK: Dzong-ka-ba (Tsong kha pa blo bzang grags pa). Drang ba dang nges pa\'i " +
"don rnam par phye ba'i bstan bcos legs bshad snying po (Sarnath: Pleasure of " +
"Elegant Sayings Press, 1979).<p>\n" +
"Ganden Triba: Oral commentary of Ganden Triba Jam-bel-shen-pen.<p>\n" +
"GCT: Ngak-wang-dra-shi (Ngag dbang bkra shis). Collected Topics by a " +
"Spiritual Son of Jam-yang-shay-ba (Sgo mang sras bsdus grwa).<p>\n" +
"GD: Dreyfus, George. Ontology, Philosophy of Language, and Epistemology in " +
"Buddhist Tradition (Ph.d. dissertation. Religious Studies, University of " +
"Virginia, Charlottesville,VA 1991).<p>\n" +
"Gon-chok: Gon-chok-jik-may-wang-bo (Dkon mchog \'jigs med dbang po). " +
"Precious Garland of Tenets (Grub mtha\' rin chen phreng ba).<p>\n" +
"Jang.: Jang-gya (Lcang skya rol pa\'i rdo rje). " +
"Presentation of Tenets (Lcang skya grub mtha').<p>\n" +
"JKA: ??? {PH: see mngon sum (definition) } <p>\n" +
"KS: Khetsun Sangpo, Biographical Dictionary of Tibet and Tibetan Buddhism. " +
"(LTWA: Dharamsala, HP)<p>\n" +
"L: Lamotte, Etienne. Samdhinirmocana-sutra " +
"(Louvain: Universite de Louvain, 1935).<p>\n" +
"LAK: Jam-bel-sam-pel (\'Jam dpal bsam phel). Presentation of Awareness and " +
"Knowledge (Blo rig gi rnam bzhag).<p>\n" +
"Lati: Oral commentary by Lati Rinbochay.<p>\n" +
"LCh: Chandra, Lokesh. Tibetan-Sanskrit Dictionary (New Delhi, 1987).<p>\n" +
"LG: Losang Gyatso\'s Blo rig.<p>\n" +
"LM: ??? {PH: see skye bu chung ngu ... }<p>\n" +
"LR: Hopkins, Jeffrey. Glossary for Gsung rab kun gyi snying po lam rim gyi " +
"gtso bo rnam pa gsung gi khrid yid gzhan phan snying po (by Panchen Lama IV).<p>\n" +
"LSR: Tsul-trim-nam-gyel (Tshul khrims rnam rgyal). Presentation of Signs " +
"and Reasonings (Rtags rigs kyi rnam bzhag).<p>\n" +
"LWT: Lo-sang-gon-chok (Blo bzang dkon mchog). Word Commentary on the Root " +
"Text of [Jam-yang-shay-ba\'s] \"Tenets\".<p>\n" +
"ME: Hopkins, Jeffrey. Meditation on Emptiness (London, Wisdom, 1983).<p>\n" +
"MGP: ??? {PH: see bkag (examples) }<p>\n" +
"MSA: Nagao, Gadjin. Index to the Mahayanasutralankara (Tokyo: Nippon " +
"Gakujutsu Shinkvo-kai, 1958).<p>\n" +
"MSI: Dzong-ka-ba (Tsong kha pa blo bzang grags pa). Middling Exposition of " +
"Special Insight (Lhag mthong \'bring).<p>\n" +
"MV: Nagao, Gadjin. Index to the Madhyanta-vibhaga (Tokyo: 1961).<p>\n" +
"N: Zuiryu NAKAMURA. Index to the Ratnagotravibhaga-mahayanottaratantra-sastra " +
"(Tokyo, 1961).<p>\n" +
"P: Peking edition of the Tripitaka.<p>\n" +
"PGP: Lo-sang-da-yang (Blo bzang rta dbyangs). Presentation of the Grounds " +
"and Paths in Prasangika (Thal \'gyur pa\'i sa lam).<p>\n" +
"PP: Candrakirti. Prasannapada.<p>\n" +
"S: Samdhinirmocana-sutra (Tok Palace version, 160 pp., Leh, Ladakh: Shesrig " +
"Dpemzod, 1975-1980, vol. ja).<p>\n" +
"TAK: Pur-bu-jok (Phur bu lcog). Explanation of the Presentation of Objects " +
"and Object-Possessors as Well as Awareness and Knowledge (Yul dang yul can " +
"dang blo rig).<p>\n" +
"TCT: Pur-bu-jok (Phur bu lcog). Presentation of the Collected Topics (Yongs " +
"\'dzin bsdus grwa).<p>\n" +
"TGP: Nga-wang-bel-den (Ngag dbang dpal ldan). Treatise Illuminating the " +
"Presentation of the Four Great Secret Tantra Sets (Sngags kyi sa lam).<p>\n" +
"TN: Vasubandhu. Trisvabhavanirdesha.<p>\n" +
"VM: Bu-don-rin-chen-drup (bu ston rin chen grub), The Practice of " +
"(Jnandagarbha\'s) \"The Rite of the Vajra-Element Mandala: The Source of All " +
"Vajras\": A Precious Enhancer of Thought (rDo rje dbyings kyi dkyil \'khor gyi " +
"cho ga rdo rje thams cad \'byung ba zhes bya ba\'i lag len rin chen bsam \'phel), " +
"in Collected Works, Part 12 na. Lhasa: Zhol Printing House, 1990.<p>\n" +
"Y: Susumi YAMAGUCHI.Index to the Prasannapada Madhyamakavrtti. " +
"(Kyoto: Heirakuji-Shoten, 1974).<p>\n" +
"YT: Oral commentary by Yeshi Thupten.";
protected SimplifiedLinkedList wordList;
public TibetanScanner()
{
wordList = new SimplifiedLinkedList();
}
public void clearTokens()
{
wordList = new SimplifiedLinkedList();
}
public Token[] getTokenArray()
{
int n=wordList.size();
if (n==0) return null;
Token token[] = new Token[n];
SimplifiedListIterator li = wordList.listIterator();
while(li.hasNext())
token[--n] = (Token)li.next();
return token;
}
public SimplifiedLinkedList getTokenLinkedList()
{
return wordList;
}
public Word[] getWordArray()
{
return getWordArray(true);
}
public Word[] getWordArray(boolean includeRepeated)
{
Token token;
Word array[], word;
int n=0;
SimplifiedListIterator li = wordList.listIterator();
SimplifiedLinkedList ll2, ll = new SimplifiedLinkedList();
while(li.hasNext())
{
token = (Token) li.next();
if (token instanceof Word)
{
ll.addLast(token);
}
}
if (includeRepeated)
{
n = ll.size();
if (n==0) return null;
array = new Word[n];
li = ll.listIterator();
n=0;
while (li.hasNext())
{
array[n++] = (Word) li.next();
}
}
else
{
ll2 = new SimplifiedLinkedList();
li = ll.listIterator();
while(li.hasNext())
{
word = (Word) li.next();
if (!ll2.contains(word)) ll2.addLast(word);
}
n = ll2.size();
if (n==0) return null;
array = new Word[n];
li = ll2.listIterator();
while (li.hasNext())
{
array[--n] = (Word) li.next();
}
}
return array;
}
public abstract void scanLine(String linea);
public abstract void scanBody(String linea);
public abstract void finishUp();
public abstract BitDictionarySource getDictionarySource();
public abstract String[] getDictionaryDescriptions();
public abstract void destroy();
}