Fixed translation tool servlet issues: got rid of title, deleted white space, dealt with UTF8 better, etc.
This commit is contained in:
parent
835e74c0cd
commit
5a0e454a2e
6 changed files with 1574 additions and 1434 deletions
|
@ -46,6 +46,7 @@ public class BasicTibetanTranscriptionConverter implements FontConverterConstant
|
||||||
private static final int WYLIE_TO_ACIP=2;
|
private static final int WYLIE_TO_ACIP=2;
|
||||||
private static final int UNICODE_TO_WYLIE=3;
|
private static final int UNICODE_TO_WYLIE=3;
|
||||||
private static final int WYLIE_TO_UNICODE=4;
|
private static final int WYLIE_TO_UNICODE=4;
|
||||||
|
private static final int TIBETAN_UNICODE_RANGE[] = {3840, 4095};
|
||||||
|
|
||||||
/** Converts from the Acip transliteration scheme to EWTS.*/
|
/** Converts from the Acip transliteration scheme to EWTS.*/
|
||||||
public static String acipToWylie(String acip)
|
public static String acipToWylie(String acip)
|
||||||
|
@ -253,6 +254,18 @@ public class BasicTibetanTranscriptionConverter implements FontConverterConstant
|
||||||
return nuevaPalabra;*/
|
return nuevaPalabra;*/
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static int getTibetanUnicodeStart(String unicode, int pos)
|
||||||
|
{
|
||||||
|
for(; pos < unicode.length(); pos++ ) if(unicode.codePointAt(pos)>=TIBETAN_UNICODE_RANGE[0] && unicode.codePointAt(pos)<=TIBETAN_UNICODE_RANGE[1]) return pos;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static int getTibetanUnicodeEnd(String unicode, int pos)
|
||||||
|
{
|
||||||
|
for(; pos < unicode.length(); pos++ ) if(unicode.codePointAt(pos)<TIBETAN_UNICODE_RANGE[0] || unicode.codePointAt(pos)>TIBETAN_UNICODE_RANGE[1]) return pos;
|
||||||
|
return pos;
|
||||||
|
}
|
||||||
|
|
||||||
/** Converts Tibetan Unicode to EWTS. */
|
/** Converts Tibetan Unicode to EWTS. */
|
||||||
public static String unicodeToWylie(String unicode)
|
public static String unicodeToWylie(String unicode)
|
||||||
{
|
{
|
||||||
|
@ -261,9 +274,9 @@ public class BasicTibetanTranscriptionConverter implements FontConverterConstant
|
||||||
TibetanDocument tibDoc;
|
TibetanDocument tibDoc;
|
||||||
StringBuffer errors;
|
StringBuffer errors;
|
||||||
int posStart=0, posEnd;
|
int posStart=0, posEnd;
|
||||||
while((posStart = Manipulate.getTibetanUnicodeStart(unicode, posStart))>=0)
|
while((posStart = getTibetanUnicodeStart(unicode, posStart))>=0)
|
||||||
{
|
{
|
||||||
posEnd = Manipulate.getTibetanUnicodeEnd(unicode, posStart+1);
|
posEnd = getTibetanUnicodeEnd(unicode, posStart+1);
|
||||||
startString = unicode.substring(0, posStart);
|
startString = unicode.substring(0, posStart);
|
||||||
tibetanString = unicode.substring(posStart, posEnd);
|
tibetanString = unicode.substring(posStart, posEnd);
|
||||||
endString = unicode.substring(posEnd);
|
endString = unicode.substring(posEnd);
|
||||||
|
|
|
@ -14,21 +14,22 @@ created by Andres Montano Pellegrini are Copyright 2001 Andres Montano
|
||||||
Pellegrini. All Rights Reserved.
|
Pellegrini. All Rights Reserved.
|
||||||
|
|
||||||
Contributor(s): ______________________________________.
|
Contributor(s): ______________________________________.
|
||||||
*/
|
*/
|
||||||
package org.thdl.tib.scanner;
|
package org.thdl.tib.scanner;
|
||||||
|
|
||||||
/** Miscelaneous static methods for the manipulation of Tibetan text.
|
/** Miscelaneous static methods for the manipulation of Tibetan text.
|
||||||
|
|
||||||
@author Andrés Montano Pellegrini
|
@author Andrés Montano Pellegrini
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public class Manipulate
|
public class Manipulate
|
||||||
{
|
{
|
||||||
private static String endOfParagraphMarks = "/;|!:^@#$%=";
|
private static String endOfParagraphMarks = "/;|!:^@#$%=,";
|
||||||
private static String bracketMarks = "<>(){}[]";
|
private static String bracketMarks = "<>(){}[]";
|
||||||
private static String endOfSyllableMarks = " _\t";
|
private static String endOfSyllableMarks = " _\t";
|
||||||
private static String allStopMarkers = endOfSyllableMarks + endOfParagraphMarks + bracketMarks;
|
private static String allStopMarkers = endOfSyllableMarks + endOfParagraphMarks + bracketMarks;
|
||||||
private static final int TIBETAN_UNICODE_RANGE[] = {3840, 4095};
|
private static final int TIBETAN_UNICODE_RANGE[] = {3840, 4095};
|
||||||
|
private static String JSON_ESCAPABLES = "\"\\/";
|
||||||
|
|
||||||
/* public static String[] parseFields (String s, char delimiter)
|
/* public static String[] parseFields (String s, char delimiter)
|
||||||
{
|
{
|
||||||
|
@ -49,53 +50,53 @@ public class Manipulate
|
||||||
|
|
||||||
public static int indexOfAnyChar(String str, String chars)
|
public static int indexOfAnyChar(String str, String chars)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
for (i=0; i<str.length(); i++)
|
for (i=0; i<str.length(); i++)
|
||||||
{
|
{
|
||||||
if (chars.indexOf(str.charAt(i))>=0)
|
if (chars.indexOf(str.charAt(i))>=0)
|
||||||
return i;
|
return i;
|
||||||
}
|
}
|
||||||
|
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static int indexOfExtendedEndOfSyllableMark(String word)
|
public static int indexOfExtendedEndOfSyllableMark(String word)
|
||||||
{
|
{
|
||||||
return indexOfAnyChar(word, allStopMarkers);
|
return indexOfAnyChar(word, allStopMarkers);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static int indexOfBracketMarks(String word)
|
public static int indexOfBracketMarks(String word)
|
||||||
{
|
{
|
||||||
return indexOfAnyChar(word, bracketMarks);
|
return indexOfAnyChar(word, bracketMarks);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static boolean isPunctuationMark(int ch)
|
public static boolean isPunctuationMark(int ch)
|
||||||
{
|
{
|
||||||
return endOfParagraphMarks.indexOf(ch)>=0 || bracketMarks.indexOf(ch)>=0;
|
return endOfParagraphMarks.indexOf(ch)>=0 || bracketMarks.indexOf(ch)>=0;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static boolean isEndOfParagraphMark(int ch)
|
public static boolean isEndOfParagraphMark(int ch)
|
||||||
{
|
{
|
||||||
return endOfParagraphMarks.indexOf(ch)>=0;
|
return endOfParagraphMarks.indexOf(ch)>=0;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static boolean isEndOfSyllableMark(int ch)
|
public static boolean isEndOfSyllableMark(int ch)
|
||||||
{
|
{
|
||||||
return endOfSyllableMarks.indexOf(ch)>=0;
|
return endOfSyllableMarks.indexOf(ch)>=0;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static boolean isMeaningful(String s)
|
public static boolean isMeaningful(String s)
|
||||||
{
|
{
|
||||||
for (int i=0; i<s.length(); i++)
|
for (int i=0; i<s.length(); i++)
|
||||||
if (Character.isLetterOrDigit(s.charAt(i))) return true;
|
if (Character.isLetterOrDigit(s.charAt(i))) return true;
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String replace(String linea, String origSub, String newSub)
|
public static String replace(String linea, String origSub, String newSub)
|
||||||
{
|
{
|
||||||
int pos, lenOrig = origSub.length();
|
int pos, lenOrig = origSub.length();
|
||||||
while ((pos = linea.indexOf(origSub))!=-1)
|
while ((pos = linea.indexOf(origSub))!=-1)
|
||||||
{
|
{
|
||||||
linea = linea.substring(0, pos).concat(newSub).concat(linea.substring(pos+lenOrig));
|
linea = linea.substring(0, pos).concat(newSub).concat(linea.substring(pos+lenOrig));
|
||||||
}
|
}
|
||||||
|
@ -104,7 +105,7 @@ public class Manipulate
|
||||||
|
|
||||||
public static String deleteSubstring (String string, int pos, int posEnd)
|
public static String deleteSubstring (String string, int pos, int posEnd)
|
||||||
{
|
{
|
||||||
if (pos<0) return string;
|
if (pos<0) return string;
|
||||||
|
|
||||||
if (pos==0)
|
if (pos==0)
|
||||||
{
|
{
|
||||||
|
@ -113,15 +114,15 @@ public class Manipulate
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (posEnd<string.length())
|
if (posEnd<string.length())
|
||||||
return string.substring(0, pos).concat(string.substring(posEnd)).trim();
|
return string.substring(0, pos).concat(string.substring(posEnd)).trim();
|
||||||
else
|
else
|
||||||
return string.substring(0, pos).trim();
|
return string.substring(0, pos).trim();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String replace(String string, int pos, int posEnd, String newSub)
|
public static String replace(String string, int pos, int posEnd, String newSub)
|
||||||
{
|
{
|
||||||
if (pos<0) return string;
|
if (pos<0) return string;
|
||||||
|
|
||||||
if (pos==0)
|
if (pos==0)
|
||||||
{
|
{
|
||||||
|
@ -130,210 +131,218 @@ public class Manipulate
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (posEnd<string.length())
|
if (posEnd<string.length())
|
||||||
return string.substring(0, pos).concat(newSub).concat(string.substring(posEnd)).trim();
|
return string.substring(0, pos).concat(newSub).concat(string.substring(posEnd)).trim();
|
||||||
else
|
else
|
||||||
return string.substring(0, pos).concat(newSub).trim();
|
return string.substring(0, pos).concat(newSub).trim();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String deleteSubstring (String string, String sub)
|
public static String deleteSubstring (String string, String sub)
|
||||||
{
|
{
|
||||||
int pos = string.indexOf(sub), posEnd = pos + sub.length();
|
int pos = string.indexOf(sub), posEnd = pos + sub.length();
|
||||||
return deleteSubstring(string, pos, posEnd);
|
return deleteSubstring(string, pos, posEnd);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String[] addString(String array[], String s, int n)
|
public static String[] addString(String array[], String s, int n)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
String newArray[] = new String[array.length+1];
|
String newArray[] = new String[array.length+1];
|
||||||
|
|
||||||
for (i=0; i<n; i++)
|
for (i=0; i<n; i++)
|
||||||
newArray[i] = array[i];
|
newArray[i] = array[i];
|
||||||
|
|
||||||
newArray[n] = s;
|
newArray[n] = s;
|
||||||
|
|
||||||
for (i=n+1; i<newArray.length; i++)
|
for (i=n+1; i<newArray.length; i++)
|
||||||
newArray[i] = array[i-1];
|
newArray[i] = array[i-1];
|
||||||
|
|
||||||
return newArray;
|
return newArray;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String[] deleteString(String array[], int n)
|
public static String[] deleteString(String array[], int n)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
String newArray[] = new String[array.length-1];
|
String newArray[] = new String[array.length-1];
|
||||||
|
|
||||||
for (i=0; i<n; i++)
|
for (i=0; i<n; i++)
|
||||||
newArray[i] = array[i];
|
newArray[i] = array[i];
|
||||||
|
|
||||||
for (i=n; i<newArray.length; i++)
|
for (i=n; i<newArray.length; i++)
|
||||||
newArray[i] = array[i+1];
|
newArray[i] = array[i+1];
|
||||||
|
|
||||||
return newArray;
|
return newArray;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static boolean isVowel (char ch)
|
public static boolean isVowel (char ch)
|
||||||
{
|
{
|
||||||
ch = Character.toLowerCase(ch);
|
ch = Character.toLowerCase(ch);
|
||||||
return ch=='a' || ch=='e' || ch=='i' || ch=='o' || ch=='u';
|
return ch=='a' || ch=='e' || ch=='i' || ch=='o' || ch=='u';
|
||||||
}
|
}
|
||||||
|
|
||||||
/** If more than half of the first letters among the first are 10 characters
|
/** If more than half of the first letters among the first are 10 characters
|
||||||
are uppercase assume its acip */
|
are uppercase assume its acip */
|
||||||
public static boolean guessIfAcip(String line)
|
public static boolean guessIfAcip(String line)
|
||||||
{
|
|
||||||
char ch;
|
|
||||||
int letters=0, upperCase=0, i, n;
|
|
||||||
n = line.length();
|
|
||||||
if (n>10) n = 10;
|
|
||||||
for (i=0; i<n; i++)
|
|
||||||
{
|
{
|
||||||
ch = line.charAt(i);
|
char ch;
|
||||||
if (Character.isLetter(ch))
|
int letters=0, upperCase=0, i, n;
|
||||||
{
|
n = line.length();
|
||||||
letters++;
|
if (n>10) n = 10;
|
||||||
if (Character.isUpperCase(ch)) upperCase++;
|
for (i=0; i<n; i++)
|
||||||
}
|
{
|
||||||
|
ch = line.charAt(i);
|
||||||
|
if (Character.isLetter(ch))
|
||||||
|
{
|
||||||
|
letters++;
|
||||||
|
if (Character.isUpperCase(ch)) upperCase++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (letters==0 || upperCase==0) return false;
|
||||||
|
else return (letters / upperCase < 2);
|
||||||
}
|
}
|
||||||
if (letters==0 || upperCase==0) return false;
|
|
||||||
else return (letters / upperCase < 2);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static boolean isTibetanUnicodeCharacter(char ch)
|
public static boolean isTibetanUnicodeCharacter(char ch)
|
||||||
{
|
|
||||||
return ch>=0xF00 && ch<=0xFFF;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static boolean isTibetanUnicodeLetter(char ch)
|
|
||||||
{
|
|
||||||
|
|
||||||
return ch>=0xF40 && ch<=0xFBC || ch>=0xF00 && ch<=0xF03;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static boolean isTibetanUnicodeDigit(char ch)
|
|
||||||
{
|
|
||||||
|
|
||||||
return ch>=0xF20 && ch<=0xF33;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static boolean guessIfUnicode(String line)
|
|
||||||
{
|
|
||||||
char ch;
|
|
||||||
int unicode=0, i, n;
|
|
||||||
n = line.length();
|
|
||||||
if (n>10) n = 10;
|
|
||||||
for (i=0; i<n; i++)
|
|
||||||
{
|
{
|
||||||
ch = line.charAt(i);
|
return ch>=0xF00 && ch<=0xFFF;
|
||||||
if (isTibetanUnicodeCharacter(ch)) unicode++;
|
|
||||||
}
|
}
|
||||||
if (n==0 || unicode==0) return false;
|
|
||||||
else return (n / unicode < 2);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static String fixWazur(String linea)
|
public static boolean isTibetanUnicodeLetter(char ch)
|
||||||
{
|
|
||||||
int i;
|
|
||||||
|
|
||||||
for (i=1; i<linea.length(); i++)
|
|
||||||
{
|
{
|
||||||
if (linea.charAt(i)=='W')
|
|
||||||
{
|
return ch>=0xF40 && ch<=0xFBC || ch>=0xF00 && ch<=0xF03;
|
||||||
if (Character.isLetter(linea.charAt(i-1)))
|
|
||||||
linea = linea.substring(0,i) + 'V' + linea.substring(i+1);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return linea;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Returns the base letter of a syllable. Does not include the vowel!
|
public static boolean isTibetanUnicodeDigit(char ch)
|
||||||
|
{
|
||||||
|
|
||||||
|
return ch>=0xF20 && ch<=0xF33;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public static boolean guessIfUnicode(String line)
|
||||||
|
{
|
||||||
|
char ch;
|
||||||
|
int unicode=0, i, n;
|
||||||
|
n = line.length();
|
||||||
|
if (n>10) n = 10;
|
||||||
|
for (i=0; i<n; i++)
|
||||||
|
{
|
||||||
|
ch = line.charAt(i);
|
||||||
|
if (isTibetanUnicodeCharacter(ch)) unicode++;
|
||||||
|
}
|
||||||
|
if (n==0 || unicode==0) return false;
|
||||||
|
else return (n / unicode < 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String fixWazur(String linea)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i=1; i<linea.length(); i++)
|
||||||
|
{
|
||||||
|
if (linea.charAt(i)=='W')
|
||||||
|
{
|
||||||
|
if (Character.isLetter(linea.charAt(i-1)))
|
||||||
|
linea = linea.substring(0,i) + 'V' + linea.substring(i+1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return linea;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns the base letter of a syllable. Does not include the vowel!
|
||||||
Ignoring cases for now. */
|
Ignoring cases for now. */
|
||||||
public static String getBaseLetter (String sil)
|
public static String getBaseLetter (String sil)
|
||||||
{
|
|
||||||
sil = sil.toLowerCase();
|
|
||||||
|
|
||||||
int i=0;
|
|
||||||
char ch, ch2;
|
|
||||||
|
|
||||||
while (!isVowel(sil.charAt(i))) i++;
|
|
||||||
if (i==0) return "";
|
|
||||||
|
|
||||||
i--;
|
|
||||||
if (i==-1) return "";
|
|
||||||
|
|
||||||
if (sil.charAt(i)=='-') i--;
|
|
||||||
|
|
||||||
ch = sil.charAt(i);
|
|
||||||
|
|
||||||
// check to see if it is a subscript (y, r, l, w)
|
|
||||||
if (i>0)
|
|
||||||
{
|
{
|
||||||
switch (ch)
|
sil = sil.toLowerCase();
|
||||||
{
|
|
||||||
case 'r': case 'l': case 'w': i--;
|
|
||||||
break;
|
|
||||||
case 'y':
|
|
||||||
ch2 = sil.charAt(i-1);
|
|
||||||
switch (ch2)
|
|
||||||
{
|
|
||||||
case '.': return "y";
|
|
||||||
case 'n': return "ny";
|
|
||||||
default: i--;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (i==0) return sil.substring(i,i+1);
|
|
||||||
ch = sil.charAt(i);
|
|
||||||
ch2 = sil.charAt(i-1);
|
|
||||||
|
|
||||||
switch(ch)
|
int i=0;
|
||||||
|
char ch, ch2;
|
||||||
|
|
||||||
|
while (!isVowel(sil.charAt(i)))
|
||||||
|
{
|
||||||
|
i++;
|
||||||
|
if (i>=sil.length()) return null;
|
||||||
|
}
|
||||||
|
if (i==0) return "";
|
||||||
|
|
||||||
|
i--;
|
||||||
|
if (i==-1) return "";
|
||||||
|
|
||||||
|
if (sil.charAt(i)=='-') i--;
|
||||||
|
if (i>0 && sil.charAt(i)=='w') i--;
|
||||||
|
ch = sil.charAt(i);
|
||||||
|
|
||||||
|
// check to see if it is a subscript (y, r, l, w)
|
||||||
|
if (i>0)
|
||||||
|
{
|
||||||
|
switch (ch)
|
||||||
|
{
|
||||||
|
case 'r': case 'l': i--;
|
||||||
|
break;
|
||||||
|
case 'y':
|
||||||
|
ch2 = sil.charAt(i-1);
|
||||||
|
switch (ch2)
|
||||||
|
{
|
||||||
|
case '.': return "y";
|
||||||
|
case 'n': return "ny";
|
||||||
|
default: i--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (sil.charAt(i)=='+') i--;
|
||||||
|
if (i==0) return sil.substring(i,i+1);
|
||||||
|
ch = sil.charAt(i);
|
||||||
|
ch2 = sil.charAt(i-1);
|
||||||
|
|
||||||
|
switch(ch)
|
||||||
|
{
|
||||||
|
case 'h':
|
||||||
|
switch (ch2)
|
||||||
|
{
|
||||||
|
case 'k': case 'c': case 't': case 'p': case 'z':
|
||||||
|
return sil.substring(i-1,i+1);
|
||||||
|
case '+':
|
||||||
|
return sil.substring(i-2, i-1);
|
||||||
|
case 's':
|
||||||
|
if (i-2>=0 && sil.charAt(i-2)=='t') return "tsh";
|
||||||
|
else return "sh";
|
||||||
|
default: return "h";
|
||||||
|
}
|
||||||
|
case 's':
|
||||||
|
if (ch2=='t') return "ts";
|
||||||
|
else return "s";
|
||||||
|
case 'g':
|
||||||
|
if (ch2=='n') return "ng";
|
||||||
|
else return "g";
|
||||||
|
case 'z':
|
||||||
|
if (ch2=='d') return "dz";
|
||||||
|
else return "z";
|
||||||
|
}
|
||||||
|
return sil.substring(i,i+1);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String deleteQuotes(String s)
|
||||||
{
|
{
|
||||||
case 'h':
|
int length = s.length(), pos;
|
||||||
switch (ch2)
|
if (length>2)
|
||||||
{
|
{
|
||||||
case 'k': case 'c': case 't': case 'p': case 'z':
|
if ((s.charAt(0)=='\"') && (s.charAt(length-1)=='\"'))
|
||||||
return sil.substring(i-1,i+1);
|
s = s.substring(1,length-1);
|
||||||
case 's':
|
|
||||||
if (i-2>=0 && sil.charAt(i-2)=='t') return "tsh";
|
do
|
||||||
else return "sh";
|
{
|
||||||
default: return "h";
|
pos = s.indexOf("\"\"");
|
||||||
}
|
if (pos<0) break;
|
||||||
case 's':
|
s = Manipulate.deleteSubstring(s, pos, pos+1);
|
||||||
if (ch2=='t') return "ts";
|
} while (true);
|
||||||
else return "s";
|
}
|
||||||
case 'g':
|
|
||||||
if (ch2=='n') return "ng";
|
return s;
|
||||||
else return "g";
|
|
||||||
case 'z':
|
|
||||||
if (ch2=='d') return "dz";
|
|
||||||
else return "z";
|
|
||||||
}
|
}
|
||||||
return sil.substring(i,i+1);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static String deleteQuotes(String s)
|
|
||||||
{
|
|
||||||
int length = s.length(), pos;
|
|
||||||
if (length>2)
|
|
||||||
{
|
|
||||||
if ((s.charAt(0)=='\"') && (s.charAt(length-1)=='\"'))
|
|
||||||
s = s.substring(1,length-1);
|
|
||||||
|
|
||||||
do
|
|
||||||
{
|
|
||||||
pos = s.indexOf("\"\"");
|
|
||||||
if (pos<0) break;
|
|
||||||
s = Manipulate.deleteSubstring(s, pos, pos+1);
|
|
||||||
} while (true);
|
|
||||||
}
|
|
||||||
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/** Syntax: java Manipulate [word-file] < source-dic-entries > dest-dic-entries
|
/** Syntax: java Manipulate [word-file] < source-dic-entries > dest-dic-entries
|
||||||
|
|
||||||
Takes the output of ConsoleScannerFilter
|
Takes the output of ConsoleScannerFilter
|
||||||
(in RY format), converts the Wylie to Acip
|
(in RY format), converts the Wylie to Acip
|
||||||
|
@ -381,55 +390,84 @@ public class Manipulate
|
||||||
if (psPalabras!=null) psPalabras.flush();
|
if (psPalabras!=null) psPalabras.flush();
|
||||||
}*/
|
}*/
|
||||||
|
|
||||||
/** From http://www.i18nfaq.com/2005/07/how-do-i-convert-ncr-format-to-java.html */
|
/** From http://www.i18nfaq.com/2005/07/how-do-i-convert-ncr-format-to-java.html */
|
||||||
public static String NCR2UnicodeString(String str)
|
public static String NCR2UnicodeString(String str)
|
||||||
{
|
{
|
||||||
StringBuffer ostr = new StringBuffer();
|
StringBuffer ostr = new StringBuffer();
|
||||||
int i1=0;
|
int i1=0;
|
||||||
int i2=0;
|
int i2=0;
|
||||||
|
|
||||||
while(i2<str.length())
|
while(i2<str.length())
|
||||||
{
|
{
|
||||||
i1 = str.indexOf("&#",i2);
|
i1 = str.indexOf("&#",i2);
|
||||||
if (i1 == -1 ) {
|
if (i1 == -1 ) {
|
||||||
ostr.append(str.substring(i2, str.length()));
|
ostr.append(str.substring(i2, str.length()));
|
||||||
break ;
|
break ;
|
||||||
}
|
}
|
||||||
ostr.append(str.substring(i2, i1));
|
ostr.append(str.substring(i2, i1));
|
||||||
i2 = str.indexOf(";", i1);
|
i2 = str.indexOf(";", i1);
|
||||||
if (i2 == -1 ) {
|
if (i2 == -1 ) {
|
||||||
ostr.append(str.substring(i1, str.length()));
|
ostr.append(str.substring(i1, str.length()));
|
||||||
break ;
|
break ;
|
||||||
}
|
}
|
||||||
|
|
||||||
String tok = str.substring(i1+2, i2);
|
String tok = str.substring(i1+2, i2);
|
||||||
try {
|
try {
|
||||||
int radix = 10 ;
|
int radix = 10 ;
|
||||||
if (tok.trim().charAt(0) == 'x') {
|
if (tok.trim().charAt(0) == 'x') {
|
||||||
radix = 16 ;
|
radix = 16 ;
|
||||||
tok = tok.substring(1,tok.length());
|
tok = tok.substring(1,tok.length());
|
||||||
}
|
}
|
||||||
ostr.append((char) Integer.parseInt(tok, radix));
|
ostr.append((char) Integer.parseInt(tok, radix));
|
||||||
} catch (NumberFormatException exp) {
|
} catch (NumberFormatException exp) {
|
||||||
ostr.append('?') ;
|
ostr.append('?') ;
|
||||||
}
|
}
|
||||||
i2++ ;
|
i2++ ;
|
||||||
}
|
}
|
||||||
return new String(ostr) ;
|
return new String(ostr) ;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String UnicodeString2NCR(String str)
|
public static String UnicodeString2NCR(String str)
|
||||||
{
|
{
|
||||||
StringBuffer ncr = new StringBuffer();
|
StringBuffer ncr = new StringBuffer();
|
||||||
int i;
|
int i;
|
||||||
for (i=0; i<str.length(); i++)
|
for (i=0; i<str.length(); i++)
|
||||||
{
|
{
|
||||||
ncr.append("&#" + Integer.toString(str.charAt(i)) + ";");
|
ncr.append("&#" + Integer.toString(str.charAt(i)) + ";");
|
||||||
}
|
}
|
||||||
return ncr.toString();
|
return ncr.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String unescape(String s) {
|
public static String toJSON(String str)
|
||||||
|
{
|
||||||
|
int pos, i, len;
|
||||||
|
for (i=0; i<str.length(); i++)
|
||||||
|
{
|
||||||
|
pos = JSON_ESCAPABLES.indexOf(str.charAt(i));
|
||||||
|
if (pos>=0)
|
||||||
|
{
|
||||||
|
len = str.length();
|
||||||
|
str = str.substring(0, i) + "\\" + str.substring(i, len);
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
str = replace(str, "\b", "\\b");
|
||||||
|
str = replace(str, "\f", "\\f");
|
||||||
|
str = replace(str, "\n", "\\n");
|
||||||
|
str = replace(str, "\r", "\\r");
|
||||||
|
str = replace(str, "\t", "\\t");
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static boolean containsLetters(String str)
|
||||||
|
{
|
||||||
|
int i=0;
|
||||||
|
if (str==null) return false;
|
||||||
|
while (i<str.length()) if (Character.isLetter(str.charAt(i++))) return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String unescape(String s) {
|
||||||
int i=0,len=s.length();
|
int i=0,len=s.length();
|
||||||
char c;
|
char c;
|
||||||
StringBuffer sb = new StringBuffer(len);
|
StringBuffer sb = new StringBuffer(len);
|
||||||
|
@ -459,5 +497,4 @@ public class Manipulate
|
||||||
for(; pos < unicode.length(); pos++ ) if(unicode.codePointAt(pos)<TIBETAN_UNICODE_RANGE[0] || unicode.codePointAt(pos)>TIBETAN_UNICODE_RANGE[1]) return pos;
|
for(; pos < unicode.length(); pos++ ) if(unicode.codePointAt(pos)<TIBETAN_UNICODE_RANGE[0] || unicode.codePointAt(pos)>TIBETAN_UNICODE_RANGE[1]) return pos;
|
||||||
return pos;
|
return pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -42,6 +42,7 @@ public class OnLineScannerFilter extends HttpServlet
|
||||||
private final static String dictNameProperty = "onlinescannerfilter.dict-file-name";
|
private final static String dictNameProperty = "onlinescannerfilter.dict-file-name";
|
||||||
private final static String otherLinksProperty = "onlinescannerfilter.links-to-other-stuff";
|
private final static String otherLinksProperty = "onlinescannerfilter.links-to-other-stuff";
|
||||||
private final static String moreLinksProperty = "onlinescannerfilter.links-to-more-stuff";
|
private final static String moreLinksProperty = "onlinescannerfilter.links-to-more-stuff";
|
||||||
|
private final static String smallerLinksProperty = "onlinescannerfilter.links-to-smaller-stuff";
|
||||||
private final static String clearStr = "Clear";
|
private final static String clearStr = "Clear";
|
||||||
private final static String buttonStr = "button";
|
private final static String buttonStr = "button";
|
||||||
private final static String scriptStr = "script";
|
private final static String scriptStr = "script";
|
||||||
|
@ -54,6 +55,7 @@ public class OnLineScannerFilter extends HttpServlet
|
||||||
|
|
||||||
public OnLineScannerFilter() //throws Exception
|
public OnLineScannerFilter() //throws Exception
|
||||||
{
|
{
|
||||||
|
System.setProperty("java.awt.headless","true");
|
||||||
rb = ResourceBundle.getBundle(propertyFile);
|
rb = ResourceBundle.getBundle(propertyFile);
|
||||||
sl = new ScannerLogger();
|
sl = new ScannerLogger();
|
||||||
|
|
||||||
|
@ -75,7 +77,14 @@ public class OnLineScannerFilter extends HttpServlet
|
||||||
HttpServletResponse response) //throws IOException, ServletException
|
HttpServletResponse response) //throws IOException, ServletException
|
||||||
{
|
{
|
||||||
String answer, parrafo = null, checkboxName;
|
String answer, parrafo = null, checkboxName;
|
||||||
|
try
|
||||||
|
{
|
||||||
|
request.setCharacterEncoding("UTF8");
|
||||||
|
}
|
||||||
|
catch(Exception e)
|
||||||
|
{
|
||||||
|
// do nothing
|
||||||
|
}
|
||||||
// if this line is included in the constructor, it works on the orion server but not on wyllie!
|
// if this line is included in the constructor, it works on the orion server but not on wyllie!
|
||||||
ThdlOptions.setUserPreference("thdl.rely.on.system.tmw.fonts", true);
|
ThdlOptions.setUserPreference("thdl.rely.on.system.tmw.fonts", true);
|
||||||
ThdlOptions.setUserPreference("thdl.rely.on.system.tm.fonts", true);
|
ThdlOptions.setUserPreference("thdl.rely.on.system.tm.fonts", true);
|
||||||
|
@ -102,15 +111,18 @@ public class OnLineScannerFilter extends HttpServlet
|
||||||
out.println("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">");
|
out.println("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">");
|
||||||
out.println("<html xmlns=\"http://www.w3.org/1999/xhtml\">");
|
out.println("<html xmlns=\"http://www.w3.org/1999/xhtml\">");
|
||||||
out.println("<head>");
|
out.println("<head>");
|
||||||
|
out.println(" <meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />");
|
||||||
if (useTHDLBanner)
|
if (useTHDLBanner)
|
||||||
{
|
{
|
||||||
out.println(" <title>Tibetan and Himalayan Digital Library - The Online Tibetan to English Translation/Dictionary Tool</title>");
|
out.println(" <title>Tibetan and Himalayan Digital Library - The Online Tibetan to English Dictionary and Translation Tool</title>");
|
||||||
out.println(" <meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />");
|
|
||||||
out.println(" <script type=\"text/javascript\" src=\"http://www.thdl.org/scripts/thdl_scripts.js\"></script>");
|
out.println(" <script type=\"text/javascript\" src=\"http://www.thdl.org/scripts/thdl_scripts.js\"></script>");
|
||||||
out.println(" <link rel=\"stylesheet\" type=\"text/css\" href=\"http://www.thdl.org/style/thdl-styles.css\"/>");
|
out.println(" <link rel=\"stylesheet\" type=\"text/css\" href=\"http://www.thdl.org/style/thdl-styles.css\"/>");
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
out.println(" <title>The Online Tibetan to English Translation/Dictionary Tool</title>");
|
{
|
||||||
|
out.println(" <title>The Online Tibetan to English Dictionary and Translation Tool</title>");
|
||||||
|
out.println(" <link rel=\"stylesheet\" type=\"text/css\" href=\"stylesheets/base.css\"/>");
|
||||||
|
}
|
||||||
|
|
||||||
out.println(" <meta name=\"keywords\" content=\"tibetan, english, dictionary, jim valby, rangjung yeshe, jeffrey hopkins, tsig mdzod chen mo, online, translation, scanner, parser, buddhism, language, processing, font, dharma, chos, tibet\">");
|
out.println(" <meta name=\"keywords\" content=\"tibetan, english, dictionary, jim valby, rangjung yeshe, jeffrey hopkins, tsig mdzod chen mo, online, translation, scanner, parser, buddhism, language, processing, font, dharma, chos, tibet\">");
|
||||||
out.println(" <meta name=\"Description\" content=\"This Java tool takes Tibetan language passages and divides the passages up into their component phrases and words, and displays corresponding dictionary definitions.\">");
|
out.println(" <meta name=\"Description\" content=\"This Java tool takes Tibetan language passages and divides the passages up into their component phrases and words, and displays corresponding dictionary definitions.\">");
|
||||||
|
@ -158,9 +170,6 @@ public class OnLineScannerFilter extends HttpServlet
|
||||||
out.println("</div><!--END sub_banner-->");
|
out.println("</div><!--END sub_banner-->");
|
||||||
out.println("<div id=\"main\">");
|
out.println("<div id=\"main\">");
|
||||||
}
|
}
|
||||||
|
|
||||||
out.println("<h3 align=\"center\">The Online Tibetan to English Translation/Dictionary Tool</h3>");
|
|
||||||
|
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
out.println(rb.getString(otherLinksProperty));
|
out.println(rb.getString(otherLinksProperty));
|
||||||
|
@ -180,18 +189,18 @@ public class OnLineScannerFilter extends HttpServlet
|
||||||
}
|
}
|
||||||
out.println("<table border=\"0\" width=\"100%\">");
|
out.println("<table border=\"0\" width=\"100%\">");
|
||||||
out.println(" <tr>");
|
out.println(" <tr>");
|
||||||
out.println(" <td width=\"25%\">");
|
out.println(" <td width=\"18%\" align=\"left\"><strong>Display results in:</strong></td>");
|
||||||
out.println(" <p>Display results in:</td>");
|
out.println(" <td width=\"41%\" align=\"right\">");
|
||||||
out.println(" <td width=\"75%\">");
|
out.println(" <input type=\"radio\" value=\"" + tibetanStr + "\" ");
|
||||||
out.println(" <p><input type=\"radio\" value=\"" + tibetanStr + "\" ");
|
|
||||||
if (wantsTibetan) out.println("checked ");
|
if (wantsTibetan) out.println("checked ");
|
||||||
out.println("name=\"" + scriptStr + "\">Tibetan script (using <a href=\"http://www.thdl.org/xml/show.php?xml=/tools/tibfonts.xml&l=uva10928423419921\" target=\"_blank\">Tibetan Machine Uni font</a>)<br/>");
|
out.println("name=\"" + scriptStr + "\">Tibetan script (<a href=\"http://www.thlib.org/tools/#wiki=/access/wiki/site/26a34146-33a6-48ce-001e-f16ce7908a6a/tibetan%20machine%20uni.html\" target=\"_top\">Tibetan Machine Uni</a> font)</td>");
|
||||||
|
out.println(" <td width=\"16%\" align=\"left\">");
|
||||||
out.println(" <input type=\"radio\" value=\"roman\" ");
|
out.println(" <input type=\"radio\" value=\"roman\" ");
|
||||||
if (!wantsTibetan) out.println("checked ");
|
if (!wantsTibetan) out.println("checked ");
|
||||||
out.println("name=\"" + scriptStr + "\">Roman script</td>");
|
out.println("name=\"" + scriptStr + "\">Roman script</td>");
|
||||||
|
out.println(" <td width=\"25%\" align=\"right\">");
|
||||||
|
out.println("<a href=\"http://www.thlib.org/tools/#wiki=/access/wiki/site/c06fa8cf-c49c-4ebc-007f-482de5382105/tibetan%20translation%20tool.html\" target=\"_top\">Help & Offline Installation</a></td>");
|
||||||
out.println(" </tr>");
|
out.println(" </tr>");
|
||||||
out.println("</table>");
|
|
||||||
|
|
||||||
if (dictionaries!=null)
|
if (dictionaries!=null)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
@ -200,7 +209,7 @@ public class OnLineScannerFilter extends HttpServlet
|
||||||
checkedDicts = new boolean[dictionaries.length];
|
checkedDicts = new boolean[dictionaries.length];
|
||||||
/* out.println(" <tr>");
|
/* out.println(" <tr>");
|
||||||
out.println("<td width=\""+ percent +"%\">Search in dictionaries:</td>");*/
|
out.println("<td width=\""+ percent +"%\">Search in dictionaries:</td>");*/
|
||||||
out.println("<p>Search in dictionaries: ");
|
out.println("<tr><td colspan=\"4\"><strong>Search in dictionaries: </strong>");
|
||||||
allUnchecked=true;
|
allUnchecked=true;
|
||||||
for (i=0; i<dictionaries.length; i++)
|
for (i=0; i<dictionaries.length; i++)
|
||||||
{
|
{
|
||||||
|
@ -239,22 +248,43 @@ public class OnLineScannerFilter extends HttpServlet
|
||||||
out.print(">" + DictionarySource.defTags[i] + " ");
|
out.print(">" + DictionarySource.defTags[i] + " ");
|
||||||
// out.println(" + "</td>");
|
// out.println(" + "</td>");
|
||||||
}
|
}
|
||||||
// out.println(" </tr>");
|
out.println(" </td></tr>");
|
||||||
}
|
}
|
||||||
// fix for updates
|
// fix for updates
|
||||||
else ds = BitDictionarySource.getAllDictionaries();
|
else ds = BitDictionarySource.getAllDictionaries();
|
||||||
// out.println("</table>");
|
// out.println("</table>");
|
||||||
out.println("</p>");
|
// out.println("</p>");
|
||||||
out.println("<table border=\"0\" width=\"100%\">");
|
// out.println("<table border=\"0\" width=\"100%\">");
|
||||||
out.println(" <tr>");
|
out.println(" <tr>");
|
||||||
out.println(" <td width=\"35%\">");
|
out.println(" <td><strong>Input text:</strong></td>");
|
||||||
out.println(" <p><strong>Input text:</strong></p>");
|
out.println(" <td><input type=\"submit\" name=\"" + buttonStr + "\" value=\"Translate\"> <input type=\"submit\" name=\"" + buttonStr + "\" value=\"" + clearStr + "\"></td>");
|
||||||
out.println(" </td>");
|
out.println(" <td colspan\"2\"> </td");
|
||||||
out.println(" <td width=\"65%\">");
|
|
||||||
out.println(" <p> <input type=\"submit\" name=\"" + buttonStr + "\" value=\"Translate\"> <input type=\"submit\" name=\"" + buttonStr + "\" value=\"" + clearStr + "\"></p>");
|
|
||||||
out.println(" </td>");
|
|
||||||
out.println(" </tr>");
|
out.println(" </tr>");
|
||||||
out.println("</table>");
|
out.println("</table>");
|
||||||
|
answer = request.getParameter(buttonStr);
|
||||||
|
String smallerLinks=null;
|
||||||
|
if (answer == null || answer != null && !answer.equals(clearStr))
|
||||||
|
{
|
||||||
|
parrafo = request.getParameter("parrafo");
|
||||||
|
}
|
||||||
|
if (parrafo==null)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
smallerLinks = rb.getString(smallerLinksProperty);
|
||||||
|
}
|
||||||
|
catch (MissingResourceException e)
|
||||||
|
{
|
||||||
|
// do nothing
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
if (smallerLinks!=null)
|
||||||
|
{
|
||||||
|
out.println("<table width=\"100%\">");
|
||||||
|
out.println("<tr>");
|
||||||
|
out.println("<td>");
|
||||||
|
}
|
||||||
|
|
||||||
out.print("<textarea rows=\"5\" name=\"parrafo\" cols=\"40\"");
|
out.print("<textarea rows=\"5\" name=\"parrafo\" cols=\"40\"");
|
||||||
if (wantsTibetan) out.print(" class=\"tib\"");
|
if (wantsTibetan) out.print(" class=\"tib\"");
|
||||||
|
@ -262,28 +292,30 @@ public class OnLineScannerFilter extends HttpServlet
|
||||||
|
|
||||||
// Paragraph should be empty if the user just clicked the clear button
|
// Paragraph should be empty if the user just clicked the clear button
|
||||||
answer = request.getParameter(buttonStr);
|
answer = request.getParameter(buttonStr);
|
||||||
if (answer == null || answer != null && !answer.equals(clearStr))
|
if (parrafo!=null)
|
||||||
{
|
{
|
||||||
parrafo = request.getParameter("parrafo");
|
out.print(parrafo);
|
||||||
if (parrafo!=null) out.print(parrafo);
|
}
|
||||||
|
out.println("</textarea>");
|
||||||
|
if (smallerLinks!=null)
|
||||||
|
{
|
||||||
|
out.println("</td>");
|
||||||
|
out.println("<td>");
|
||||||
|
out.println(smallerLinks);
|
||||||
|
out.println("</td>");
|
||||||
|
out.println("</tr>");
|
||||||
|
out.println("</table>");
|
||||||
}
|
}
|
||||||
|
|
||||||
out.println("</textarea>");
|
|
||||||
out.println("</form>");
|
out.println("</form>");
|
||||||
try
|
|
||||||
{
|
|
||||||
out.println(rb.getString(moreLinksProperty));
|
|
||||||
}
|
|
||||||
catch (MissingResourceException e)
|
|
||||||
{
|
|
||||||
// do nothing
|
|
||||||
}
|
|
||||||
|
|
||||||
if (parrafo != null)
|
if (parrafo != null)
|
||||||
{
|
{
|
||||||
sl.writeLog("4\t1");
|
sl.writeLog("4\t1");
|
||||||
if (ds!=null && !ds.isEmpty())
|
if (ds!=null && !ds.isEmpty())
|
||||||
|
{
|
||||||
desglosar(parrafo, out, wantsTibetan);
|
desglosar(parrafo, out, wantsTibetan);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else sl.writeLog("3\t1");
|
else sl.writeLog("3\t1");
|
||||||
|
|
||||||
|
@ -304,7 +336,7 @@ public class OnLineScannerFilter extends HttpServlet
|
||||||
{
|
{
|
||||||
//boolean hayMasLineas=true;
|
//boolean hayMasLineas=true;
|
||||||
//int init = 0, fin;
|
//int init = 0, fin;
|
||||||
//String linea;
|
String tmp;
|
||||||
Object words[];
|
Object words[];
|
||||||
|
|
||||||
if (!in.equals(""))
|
if (!in.equals(""))
|
||||||
|
@ -331,6 +363,17 @@ public class OnLineScannerFilter extends HttpServlet
|
||||||
scanner.scanBody(in);
|
scanner.scanBody(in);
|
||||||
scanner.finishUp();
|
scanner.finishUp();
|
||||||
printText(pw, tibetan);
|
printText(pw, tibetan);
|
||||||
|
try
|
||||||
|
{
|
||||||
|
tmp = rb.getString(moreLinksProperty);
|
||||||
|
pw.println("<p>");
|
||||||
|
pw.println(tmp);
|
||||||
|
pw.println("</p>");
|
||||||
|
}
|
||||||
|
catch (MissingResourceException e)
|
||||||
|
{
|
||||||
|
// do nothing
|
||||||
|
}
|
||||||
printAllDefs(pw, tibetan);
|
printAllDefs(pw, tibetan);
|
||||||
scanner.clearTokens();
|
scanner.clearTokens();
|
||||||
}
|
}
|
||||||
|
@ -393,8 +436,7 @@ public class OnLineScannerFilter extends HttpServlet
|
||||||
|
|
||||||
words = scanner.getWordArray(false);
|
words = scanner.getWordArray(false);
|
||||||
|
|
||||||
if (words == null)
|
if (words == null) return;
|
||||||
return;
|
|
||||||
pw.println("<table border=\"1\" width=\"100%\">");
|
pw.println("<table border=\"1\" width=\"100%\">");
|
||||||
|
|
||||||
for (j = 0; j < words.length; j++) {
|
for (j = 0; j < words.length; j++) {
|
||||||
|
|
|
@ -14,7 +14,7 @@ created by Andres Montano Pellegrini are Copyright 2001 Andres Montano
|
||||||
Pellegrini. All Rights Reserved.
|
Pellegrini. All Rights Reserved.
|
||||||
|
|
||||||
Contributor(s): ______________________________________.
|
Contributor(s): ______________________________________.
|
||||||
*/
|
*/
|
||||||
package org.thdl.tib.scanner;
|
package org.thdl.tib.scanner;
|
||||||
|
|
||||||
import java.io.BufferedReader;
|
import java.io.BufferedReader;
|
||||||
|
@ -32,59 +32,82 @@ import javax.servlet.ServletResponse;
|
||||||
|
|
||||||
@author Andrés Montano Pellegrini
|
@author Andrés Montano Pellegrini
|
||||||
@see RemoteTibetanScanner
|
@see RemoteTibetanScanner
|
||||||
*/
|
*/
|
||||||
public class RemoteScannerFilter extends GenericServlet
|
public class RemoteScannerFilter extends GenericServlet
|
||||||
{
|
{
|
||||||
private TibetanScanner scanner;
|
private TibetanScanner scanner;
|
||||||
private BitDictionarySource ds;
|
private BitDictionarySource ds;
|
||||||
private ScannerLogger sl;
|
private ScannerLogger sl;
|
||||||
|
private static final int INTERNAL = 1;
|
||||||
|
private static final int JSON = 2;
|
||||||
|
|
||||||
public RemoteScannerFilter()
|
public RemoteScannerFilter()
|
||||||
{
|
{
|
||||||
|
System.setProperty("java.awt.headless","true");
|
||||||
ResourceBundle rb = ResourceBundle.getBundle("dictionary");
|
ResourceBundle rb = ResourceBundle.getBundle("dictionary");
|
||||||
sl = new ScannerLogger();
|
sl = new ScannerLogger();
|
||||||
|
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
scanner = new LocalTibetanScanner(rb.getString("onlinescannerfilter.dict-file-name"),false);
|
scanner = new LocalTibetanScanner(rb.getString("onlinescannerfilter.dict-file-name"),false);
|
||||||
}
|
}
|
||||||
catch (Exception e)
|
catch (Exception e)
|
||||||
{
|
{
|
||||||
sl.writeLog("1\t2");
|
sl.writeLog("1\t2");
|
||||||
sl.writeException(e);
|
sl.writeException(e);
|
||||||
}
|
}
|
||||||
|
scanner.getDictionaryDescriptions();
|
||||||
ds = scanner.getDictionarySource();
|
ds = scanner.getDictionarySource();
|
||||||
sl.writeLog("Creation\t2");
|
sl.writeLog("Creation\t2");
|
||||||
}
|
}
|
||||||
|
|
||||||
public void service(ServletRequest req, ServletResponse res) //throws ServletException, IOException
|
public void service(ServletRequest req, ServletResponse res) //throws ServletException, IOException
|
||||||
{
|
{
|
||||||
BufferedReader br;
|
BufferedReader br;
|
||||||
res.setContentType ("text/plain");
|
int format, i, j, k;
|
||||||
sl.setUserIP(req.getRemoteAddr());
|
try
|
||||||
|
{
|
||||||
|
req.setCharacterEncoding("UTF8");
|
||||||
|
}
|
||||||
|
catch(Exception e)
|
||||||
|
{
|
||||||
|
// do nothing
|
||||||
|
}
|
||||||
|
String linea, dicts = req.getParameter("dicts"), dicDescrip[], jwf = req.getParameter("jwf"), tag;
|
||||||
|
Definitions defs;
|
||||||
|
ByteDictionarySource dict_source;
|
||||||
|
if (jwf!=null) format = JSON;
|
||||||
|
else format = INTERNAL;
|
||||||
|
switch (format)
|
||||||
|
{
|
||||||
|
case INTERNAL:
|
||||||
|
res.setContentType ("text/plain");
|
||||||
|
break;
|
||||||
|
case JSON:
|
||||||
|
res.setContentType ("text/x-json");
|
||||||
|
}
|
||||||
|
sl.setUserIP(req.getRemoteAddr());
|
||||||
|
|
||||||
Word word = null, words[] = null;
|
Word word = null, words[] = null;
|
||||||
PrintWriter out;
|
PrintWriter out;
|
||||||
|
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
out = res.getWriter();
|
out = res.getWriter();
|
||||||
}
|
}
|
||||||
catch (Exception e)
|
catch (Exception e)
|
||||||
{
|
{
|
||||||
sl.writeLog("1\t2");
|
sl.writeLog("1\t2");
|
||||||
sl.writeException(e);
|
sl.writeException(e);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
int i;
|
|
||||||
String linea, dicts = req.getParameter("dicts"), dicDescrip[];
|
|
||||||
|
|
||||||
if (dicts!=null)
|
if (dicts!=null)
|
||||||
{
|
{
|
||||||
if (dicts.equals("names"))
|
if (dicts.equals("names"))
|
||||||
{
|
{
|
||||||
sl.writeLog("3\t2");
|
sl.writeLog("3\t2");
|
||||||
dicDescrip = scanner.getDictionaryDescriptions();
|
dicDescrip = scanner.getDictionaryDescriptions();
|
||||||
if (dicDescrip==null)
|
if (dicDescrip==null)
|
||||||
{
|
{
|
||||||
|
@ -98,65 +121,86 @@ public class RemoteScannerFilter extends GenericServlet
|
||||||
}
|
}
|
||||||
out.close();
|
out.close();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
ds.setDicts(Integer.parseInt(dicts));
|
ds.setDicts(Integer.parseInt(dicts));
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
try
|
|
||||||
{
|
|
||||||
br = new BufferedReader(new InputStreamReader(req.getInputStream()));
|
|
||||||
}
|
|
||||||
catch (Exception e)
|
|
||||||
{
|
|
||||||
sl.writeLog("1\t2");
|
|
||||||
sl.writeException(e);
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (format==JSON)
|
||||||
|
{
|
||||||
|
out.println(jwf + "({\"words\":{");
|
||||||
|
}
|
||||||
|
try
|
||||||
|
{
|
||||||
|
scanner.clearTokens();
|
||||||
|
switch (format)
|
||||||
|
{
|
||||||
|
case INTERNAL:
|
||||||
|
br = req.getReader();
|
||||||
|
sl.writeLog("4\t2");
|
||||||
|
while((linea = br.readLine())!= null)
|
||||||
|
scanner.scanLine(linea);
|
||||||
|
br.close();
|
||||||
|
break;
|
||||||
|
case JSON:
|
||||||
|
linea = req.getParameter("text");
|
||||||
|
linea = Manipulate.NCR2UnicodeString(linea);
|
||||||
|
if (Manipulate.guessIfUnicode(linea)) linea = BasicTibetanTranscriptionConverter.unicodeToWylie(linea);
|
||||||
|
else if (Manipulate.guessIfAcip(linea)) linea = BasicTibetanTranscriptionConverter.acipToWylie(linea);
|
||||||
|
scanner.scanLine(linea);
|
||||||
|
}
|
||||||
|
scanner.finishUp();
|
||||||
|
words = scanner.getWordArray();
|
||||||
|
|
||||||
/* FIXME: sometimes getDef raises a NullPointerException.
|
for (i=0; i<words.length; i++)
|
||||||
In the meantime, I'll just keep it from crashing
|
{
|
||||||
*/
|
linea = words[i].getDef();
|
||||||
sl.writeLog("4\t2");
|
if (linea == null) continue;
|
||||||
|
switch (format)
|
||||||
try
|
{
|
||||||
{
|
case INTERNAL:
|
||||||
scanner.clearTokens();
|
out.println(words[i].getWylie());
|
||||||
while((linea = br.readLine())!= null)
|
out.println(linea);
|
||||||
scanner.scanLine(linea);
|
out.println();
|
||||||
|
break;
|
||||||
br.close();
|
case JSON:
|
||||||
|
out.println("\"" + BasicTibetanTranscriptionConverter.wylieToHTMLUnicode(words[i].token) + "\": [");
|
||||||
scanner.finishUp();
|
defs = words[i].getDefs();
|
||||||
words = scanner.getWordArray();
|
dict_source = (ByteDictionarySource)defs.getDictionarySource();
|
||||||
|
k=0;
|
||||||
for (i=0; i<words.length; i++)
|
for (j=0; j<defs.def.length; j++)
|
||||||
{
|
{
|
||||||
linea = words[i].getDef();
|
while (dict_source.isEmpty(k)) k++;
|
||||||
if (linea == null) continue;
|
tag = dict_source.getTag(k);
|
||||||
out.println(words[i].getWylie());
|
k++;
|
||||||
out.println(linea);
|
out.println("\"" + tag + "\",");
|
||||||
out.println();
|
out.print("\"" + Manipulate.toJSON(defs.def[j]) + "\"");
|
||||||
}
|
if (j==defs.def.length-1) out.println();
|
||||||
|
else out.println(",");
|
||||||
|
}
|
||||||
|
out.print("]");
|
||||||
|
if (i<words.length-1) out.println(",");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (format==JSON) out.println("}});");
|
||||||
}
|
}
|
||||||
catch (Exception e)
|
catch (Exception e)
|
||||||
{
|
{
|
||||||
sl.writeLog("1\t2\t" + word.getWylie());
|
sl.writeLog("1\t2\t" + word.getWylie());
|
||||||
sl.writeException(e);
|
sl.writeException(e);
|
||||||
}
|
}
|
||||||
|
|
||||||
scanner.clearTokens();
|
scanner.clearTokens();
|
||||||
out.close();
|
out.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
public void destroy()
|
public void destroy()
|
||||||
{
|
{
|
||||||
super.destroy();
|
super.destroy();
|
||||||
sl.setUserIP(null);
|
sl.setUserIP(null);
|
||||||
sl.writeLog("5\t2");
|
sl.writeLog("5\t2");
|
||||||
scanner.destroy();
|
scanner.destroy();
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -14,7 +14,7 @@ created by Andres Montano Pellegrini are Copyright 2001 Andres Montano
|
||||||
Pellegrini. All Rights Reserved.
|
Pellegrini. All Rights Reserved.
|
||||||
|
|
||||||
Contributor(s): ______________________________________.
|
Contributor(s): ______________________________________.
|
||||||
*/
|
*/
|
||||||
package org.thdl.tib.scanner;
|
package org.thdl.tib.scanner;
|
||||||
|
|
||||||
import java.io.FileOutputStream;
|
import java.io.FileOutputStream;
|
||||||
|
@ -26,61 +26,68 @@ import java.util.ResourceBundle;
|
||||||
servlet version of the translation tool.
|
servlet version of the translation tool.
|
||||||
|
|
||||||
@author Andrés Montano Pellegrini
|
@author Andrés Montano Pellegrini
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public class ScannerLogger
|
public class ScannerLogger
|
||||||
{
|
{
|
||||||
private String fileName;
|
private String fileName;
|
||||||
private String lastIP;
|
private String lastIP;
|
||||||
|
private boolean enabled;
|
||||||
|
|
||||||
public ScannerLogger()
|
public ScannerLogger()
|
||||||
{
|
{
|
||||||
ResourceBundle rb = ResourceBundle.getBundle("dictionary");
|
String temp;
|
||||||
fileName = rb.getString("remotescannerfilter.log-file-name");
|
ResourceBundle rb = ResourceBundle.getBundle("dictionary");
|
||||||
lastIP = null;
|
fileName = rb.getString("remotescannerfilter.log-file-name");
|
||||||
}
|
temp = rb.getString("remotescannerfilter.logging-enabled");
|
||||||
|
if (temp==null) enabled = false;
|
||||||
|
else enabled = temp.toLowerCase().equals("yes");
|
||||||
|
lastIP = null;
|
||||||
|
}
|
||||||
|
|
||||||
public String getCurrentTime()
|
public String getCurrentTime()
|
||||||
{
|
{
|
||||||
Calendar rightNow = Calendar.getInstance();
|
Calendar rightNow = Calendar.getInstance();
|
||||||
return Integer.toString(rightNow.get(Calendar.YEAR)) + "\t" + Integer.toString(rightNow.get(Calendar.MONTH)) + "\t" + Integer.toString(rightNow.get(Calendar.DAY_OF_MONTH)) + "\t" + Integer.toString(rightNow.get(Calendar.HOUR_OF_DAY)) + "\t" + Integer.toString(rightNow.get(Calendar.MINUTE)) + "\t" + Integer.toString(rightNow.get(Calendar.SECOND));
|
return Integer.toString(rightNow.get(Calendar.YEAR)) + "\t" + Integer.toString(rightNow.get(Calendar.MONTH)) + "\t" + Integer.toString(rightNow.get(Calendar.DAY_OF_MONTH)) + "\t" + Integer.toString(rightNow.get(Calendar.HOUR_OF_DAY)) + "\t" + Integer.toString(rightNow.get(Calendar.MINUTE)) + "\t" + Integer.toString(rightNow.get(Calendar.SECOND));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setUserIP(String lastIP)
|
public void setUserIP(String lastIP)
|
||||||
{
|
{
|
||||||
this.lastIP = lastIP;
|
this.lastIP = lastIP;
|
||||||
}
|
}
|
||||||
|
|
||||||
synchronized public void writeLog(String s)
|
synchronized public void writeLog(String s)
|
||||||
{
|
{
|
||||||
PrintStream pw = getPrintStream();
|
if (!enabled) return;
|
||||||
if (lastIP!=null) pw.print(lastIP);
|
PrintStream pw = getPrintStream();
|
||||||
else pw.print("-");
|
if (lastIP!=null) pw.print(lastIP);
|
||||||
pw.println("\t" + getCurrentTime() + "\t" + s);
|
else pw.print("-");
|
||||||
pw.flush();
|
pw.println("\t" + getCurrentTime() + "\t" + s);
|
||||||
pw.close();
|
pw.flush();
|
||||||
}
|
pw.close();
|
||||||
|
}
|
||||||
|
|
||||||
private PrintStream getPrintStream()
|
private PrintStream getPrintStream()
|
||||||
{
|
{
|
||||||
PrintStream pw;
|
PrintStream pw;
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
pw = new PrintStream(new FileOutputStream(fileName, true));
|
pw = new PrintStream(new FileOutputStream(fileName, true));
|
||||||
return pw;
|
return pw;
|
||||||
}
|
}
|
||||||
catch (Exception e)
|
catch (Exception e)
|
||||||
{
|
{
|
||||||
e.printStackTrace();
|
e.printStackTrace();
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
synchronized public void writeException(Exception e)
|
synchronized public void writeException(Exception e)
|
||||||
{
|
{
|
||||||
PrintStream pw = getPrintStream();
|
if (!enabled) return;
|
||||||
e.printStackTrace(pw);
|
PrintStream pw = getPrintStream();
|
||||||
pw.flush();
|
e.printStackTrace(pw);
|
||||||
pw.close();
|
pw.flush();
|
||||||
}
|
pw.close();
|
||||||
|
}
|
||||||
}
|
}
|
|
@ -14,7 +14,7 @@ created by Andres Montano Pellegrini are Copyright 2001 Andres Montano
|
||||||
Pellegrini. All Rights Reserved.
|
Pellegrini. All Rights Reserved.
|
||||||
|
|
||||||
Contributor(s): ______________________________________.
|
Contributor(s): ______________________________________.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package org.thdl.tib.scanner;
|
package org.thdl.tib.scanner;
|
||||||
import org.thdl.util.SimplifiedLinkedList;
|
import org.thdl.util.SimplifiedLinkedList;
|
||||||
|
@ -24,13 +24,13 @@ import org.thdl.util.ThdlVersion;
|
||||||
/** Defines the core methods required to provide access to a dictionary; local or remote.
|
/** Defines the core methods required to provide access to a dictionary; local or remote.
|
||||||
|
|
||||||
@author Andrés Montano Pellegrini
|
@author Andrés Montano Pellegrini
|
||||||
*/
|
*/
|
||||||
public abstract class TibetanScanner
|
public abstract class TibetanScanner
|
||||||
{
|
{
|
||||||
public static final String version = "The Tibetan to English Translation Tool, version 3.3.0 compiled on " + ThdlVersion.getTimeOfCompilation() + ". ";
|
public static final String version = "The Tibetan to English Translation Tool, version 3.3.0 compiled on " + ThdlVersion.getTimeOfCompilation() + ". ";
|
||||||
public static final String copyrightUnicode="Copyright " + '\u00A9' + " 2000-200??6 by Andr" + '\u00E9' + "s Montano Pellegrini, all rights reserved.";
|
public static final String copyrightUnicode="Copyright " + '\u00A9' + " 2000-2009 by Andr" + '\u00E9' + "s Montano Pellegrini, all rights reserved.";
|
||||||
public static final String copyrightASCII="Copyright 2000-2006 by Andres Montano Pellegrini, all rights reserved.";
|
public static final String copyrightASCII="Copyright 2000-2009 by Andres Montano Pellegrini, all rights reserved.";
|
||||||
public static final String copyrightHTML="<hr><small><strong>" + version + "Copyright © 2000-2006 by <a href=\"http://www.people.virginia.edu/~am2zb/\" target=\"_blank\">Andrés Montano Pellegrini.</a><br/>All rights reserved.</strong></small>";
|
public static final String copyrightHTML="<hr><small><strong>" + version + "Copyright © 2000-2009 by <a href=\"http://www.gaugeus.com/ramblings\" target=\"_blank\">Andrés Montano Pellegrini.</a> All rights reserved.</strong></small>";
|
||||||
|
|
||||||
public static final int NORMAL_MODE=1;
|
public static final int NORMAL_MODE=1;
|
||||||
public static final int DEBUG_MODE=2;
|
public static final int DEBUG_MODE=2;
|
||||||
|
@ -42,123 +42,123 @@ public abstract class TibetanScanner
|
||||||
}
|
}
|
||||||
|
|
||||||
public static final String aboutTomeraider=
|
public static final String aboutTomeraider=
|
||||||
"Welcome to Jeffrey Hopkins' Tibetan-Sanskrit-English Dictionary version 2.0.0!<p>\n" +
|
"Welcome to Jeffrey Hopkins' Tibetan-Sanskrit-English Dictionary version 2.0.0!<p>\n" +
|
||||||
"This file was automatically generated using software developed by Andres Montano Pellegrini. " +
|
"This file was automatically generated using software developed by Andres Montano Pellegrini. " +
|
||||||
"For more information, see http://www.people.virginia.edu/~am2zb/tibetan .<p>" +
|
"For more information, see http://www.people.virginia.edu/~am2zb/tibetan .<p>" +
|
||||||
"<b>Formulator and Editor</b>: Jeffrey Hopkins<br>\n" +
|
"<b>Formulator and Editor</b>: Jeffrey Hopkins<br>\n" +
|
||||||
"<b>Contributors</b>: Joe Wilson, Craig Preston, John Powers, Nathanial Garson, " +
|
"<b>Contributors</b>: Joe Wilson, Craig Preston, John Powers, Nathanial Garson, " +
|
||||||
"Paul Hackett, Andres Montano<p>" +
|
"Paul Hackett, Andres Montano<p>" +
|
||||||
"A project of the Tibetan Studies Institute, Boonesville, Virginia, and the " +
|
"A project of the Tibetan Studies Institute, Boonesville, Virginia, and the " +
|
||||||
"University of Virginia Tibetan Studies Program<p>" +
|
"University of Virginia Tibetan Studies Program<p>" +
|
||||||
"<i>\u00A9 Jeffrey Hopkins 1992.</i><p>" +
|
"<i>\u00A9 Jeffrey Hopkins 1992.</i><p>" +
|
||||||
"<b>Apology</b><p>" +
|
"<b>Apology</b><p>" +
|
||||||
"This is a work in progress in crude form that is being shared with students " +
|
"This is a work in progress in crude form that is being shared with students " +
|
||||||
"of the Tibetan language mainly in order to receive input for further " +
|
"of the Tibetan language mainly in order to receive input for further " +
|
||||||
"development. The English translations of the entries can be said only to " +
|
"development. The English translations of the entries can be said only to " +
|
||||||
"represent what contributors, over a span of over thirty years, thought were " +
|
"represent what contributors, over a span of over thirty years, thought were " +
|
||||||
"my current translations. A small number are simply wrong; others need to be " +
|
"my current translations. A small number are simply wrong; others need to be " +
|
||||||
"updated; and all will receive much more attention and, hence, detail.<p>\n" +
|
"updated; and all will receive much more attention and, hence, detail.<p>\n" +
|
||||||
"The Dictionary has been entered into a database with fields for the entry, " +
|
"The Dictionary has been entered into a database with fields for the entry, " +
|
||||||
"Sanskrit, tenses, my English, a few others’ interests, examples, " +
|
"Sanskrit, tenses, my English, a few others’ interests, examples, " +
|
||||||
"definition, divisions, and comments. At this point, very few entries " +
|
"definition, divisions, and comments. At this point, very few entries " +
|
||||||
"contain all of these items, but the plan is provide these, where " +
|
"contain all of these items, but the plan is provide these, where " +
|
||||||
"appropriate, over the years. Translations for entries that have arisen from " +
|
"appropriate, over the years. Translations for entries that have arisen from " +
|
||||||
"my work and from interactions with my students are in boldface, whereas " +
|
"my work and from interactions with my students are in boldface, whereas " +
|
||||||
"those from other works are in regular type on separate lines and are marked " +
|
"those from other works are in regular type on separate lines and are marked " +
|
||||||
"with an initial at the end of the line. A key to these markings is given on " +
|
"with an initial at the end of the line. A key to these markings is given on " +
|
||||||
"the next page.<p>\n" +
|
"the next page.<p>\n" +
|
||||||
"(Please note that the radical signs for Sanskrit roots are, after the first" +
|
"(Please note that the radical signs for Sanskrit roots are, after the first" +
|
||||||
"letter of the alphabet, in a state of disarray.)<p>\n" +
|
"letter of the alphabet, in a state of disarray.)<p>\n" +
|
||||||
"I hope that you will bear with the many inadequacies of this first release.<p>\n" +
|
"I hope that you will bear with the many inadequacies of this first release.<p>\n" +
|
||||||
"Paul Jeffrey Hopkins<br>\n" +
|
"Paul Jeffrey Hopkins<br>\n" +
|
||||||
"Professor of Tibetan Studies<p>\n" +
|
"Professor of Tibetan Studies<p>\n" +
|
||||||
"<b>Abbreviations</b><p>\n" +
|
"<b>Abbreviations</b><p>\n" +
|
||||||
"B-7: ??? {PH: see dngos med ... & dngos po (synonyms) }<p>\n" +
|
"B-7: ??? {PH: see dngos med ... & dngos po (synonyms) }<p>\n" +
|
||||||
"BJ: Bel-jor-hlun-drup (Dpal \'byor lhun grub). Legs bshad snying po\'i dka' " +
|
"BJ: Bel-jor-hlun-drup (Dpal \'byor lhun grub). Legs bshad snying po\'i dka' " +
|
||||||
"\'grel bstan pa\'i sgron me (Buxaduar: Sera Monastery, 1968).<p>\n" +
|
"\'grel bstan pa\'i sgron me (Buxaduar: Sera Monastery, 1968).<p>\n" +
|
||||||
"BK: ??? {PH: see bka\' (examples) }<p>\n" +
|
"BK: ??? {PH: see bka\' (examples) }<p>\n" +
|
||||||
"BR: Losang Gyatso (Blo bzang rgya mtsho). Presentation of Knowledge and " +
|
"BR: Losang Gyatso (Blo bzang rgya mtsho). Presentation of Knowledge and " +
|
||||||
"Awareness (Blo rig).<p>\n" +
|
"Awareness (Blo rig).<p>\n" +
|
||||||
"BWT: Ngak-wang-bel-den (Ngag dbang dpal ldan). Annotations for " +
|
"BWT: Ngak-wang-bel-den (Ngag dbang dpal ldan). Annotations for " +
|
||||||
"[Jam-yang-shay-ba\'s] \"Tenets\" (Grub mtha\' chen mo\'i mchan).<p>\n" +
|
"[Jam-yang-shay-ba\'s] \"Tenets\" (Grub mtha\' chen mo\'i mchan).<p>\n" +
|
||||||
"C: Conze, Edward. Materials for a Dictionary of the Prajnaparamita " +
|
"C: Conze, Edward. Materials for a Dictionary of the Prajnaparamita " +
|
||||||
"Literature (Tokyo: Suzuki Research Foundation, 1967).<p>\n" +
|
"Literature (Tokyo: Suzuki Research Foundation, 1967).<p>\n" +
|
||||||
"col.: colloquial<p>\n" +
|
"col.: colloquial<p>\n" +
|
||||||
"D1: Pur-bu-jok (Phur bu lcog). Presentation of the Collected Topics " +
|
"D1: Pur-bu-jok (Phur bu lcog). Presentation of the Collected Topics " +
|
||||||
"(Part 1: Bsdus grwa chung ngu).<p>\n" +
|
"(Part 1: Bsdus grwa chung ngu).<p>\n" +
|
||||||
"D2: Pur-bu-jok (Phur bu lcog). Presentation of the Collected Topics " +
|
"D2: Pur-bu-jok (Phur bu lcog). Presentation of the Collected Topics " +
|
||||||
"(Part 2: Bsdus grwa \'bring).<p>\n" +
|
"(Part 2: Bsdus grwa \'bring).<p>\n" +
|
||||||
"DASI: Decisive Analysis of Special Insight.<p>\n" +
|
"DASI: Decisive Analysis of Special Insight.<p>\n" +
|
||||||
"DG: Germano, David. Poetic Thought, the Intelligent Universe, and the " +
|
"DG: Germano, David. Poetic Thought, the Intelligent Universe, and the " +
|
||||||
"Mystery of Self: the Tantric Synthesis of rDzogs Chen in Fourteenth Century " +
|
"Mystery of Self: the Tantric Synthesis of rDzogs Chen in Fourteenth Century " +
|
||||||
"Tibet. (Ph.d. dissertation, University of Wisconsin, Madison,WI 1992).<p>\n" +
|
"Tibet. (Ph.d. dissertation, University of Wisconsin, Madison,WI 1992).<p>\n" +
|
||||||
"DK: Dzong-ka-ba (Tsong kha pa blo bzang grags pa). Drang ba dang nges pa\'i " +
|
"DK: Dzong-ka-ba (Tsong kha pa blo bzang grags pa). Drang ba dang nges pa\'i " +
|
||||||
"don rnam par phye ba'i bstan bcos legs bshad snying po (Sarnath: Pleasure of " +
|
"don rnam par phye ba'i bstan bcos legs bshad snying po (Sarnath: Pleasure of " +
|
||||||
"Elegant Sayings Press, 1979).<p>\n" +
|
"Elegant Sayings Press, 1979).<p>\n" +
|
||||||
"Ganden Triba: Oral commentary of Ganden Triba Jam-bel-shen-pen.<p>\n" +
|
"Ganden Triba: Oral commentary of Ganden Triba Jam-bel-shen-pen.<p>\n" +
|
||||||
"GCT: Ngak-wang-dra-shi (Ngag dbang bkra shis). Collected Topics by a " +
|
"GCT: Ngak-wang-dra-shi (Ngag dbang bkra shis). Collected Topics by a " +
|
||||||
"Spiritual Son of Jam-yang-shay-ba (Sgo mang sras bsdus grwa).<p>\n" +
|
"Spiritual Son of Jam-yang-shay-ba (Sgo mang sras bsdus grwa).<p>\n" +
|
||||||
"GD: Dreyfus, George. Ontology, Philosophy of Language, and Epistemology in " +
|
"GD: Dreyfus, George. Ontology, Philosophy of Language, and Epistemology in " +
|
||||||
"Buddhist Tradition (Ph.d. dissertation. Religious Studies, University of " +
|
"Buddhist Tradition (Ph.d. dissertation. Religious Studies, University of " +
|
||||||
"Virginia, Charlottesville,VA 1991).<p>\n" +
|
"Virginia, Charlottesville,VA 1991).<p>\n" +
|
||||||
"Gon-chok: Gon-chok-jik-may-wang-bo (Dkon mchog \'jigs med dbang po). " +
|
"Gon-chok: Gon-chok-jik-may-wang-bo (Dkon mchog \'jigs med dbang po). " +
|
||||||
"Precious Garland of Tenets (Grub mtha\' rin chen phreng ba).<p>\n" +
|
"Precious Garland of Tenets (Grub mtha\' rin chen phreng ba).<p>\n" +
|
||||||
"Jang.: Jang-gya (Lcang skya rol pa\'i rdo rje). " +
|
"Jang.: Jang-gya (Lcang skya rol pa\'i rdo rje). " +
|
||||||
"Presentation of Tenets (Lcang skya grub mtha').<p>\n" +
|
"Presentation of Tenets (Lcang skya grub mtha').<p>\n" +
|
||||||
"JKA: ??? {PH: see mngon sum (definition) } <p>\n" +
|
"JKA: ??? {PH: see mngon sum (definition) } <p>\n" +
|
||||||
"KS: Khetsun Sangpo, Biographical Dictionary of Tibet and Tibetan Buddhism. " +
|
"KS: Khetsun Sangpo, Biographical Dictionary of Tibet and Tibetan Buddhism. " +
|
||||||
"(LTWA: Dharamsala, HP)<p>\n" +
|
"(LTWA: Dharamsala, HP)<p>\n" +
|
||||||
"L: Lamotte, Etienne. Samdhinirmocana-sutra " +
|
"L: Lamotte, Etienne. Samdhinirmocana-sutra " +
|
||||||
"(Louvain: Universite de Louvain, 1935).<p>\n" +
|
"(Louvain: Universite de Louvain, 1935).<p>\n" +
|
||||||
"LAK: Jam-bel-sam-pel (\'Jam dpal bsam phel). Presentation of Awareness and " +
|
"LAK: Jam-bel-sam-pel (\'Jam dpal bsam phel). Presentation of Awareness and " +
|
||||||
"Knowledge (Blo rig gi rnam bzhag).<p>\n" +
|
"Knowledge (Blo rig gi rnam bzhag).<p>\n" +
|
||||||
"Lati: Oral commentary by Lati Rinbochay.<p>\n" +
|
"Lati: Oral commentary by Lati Rinbochay.<p>\n" +
|
||||||
"LCh: Chandra, Lokesh. Tibetan-Sanskrit Dictionary (New Delhi, 1987).<p>\n" +
|
"LCh: Chandra, Lokesh. Tibetan-Sanskrit Dictionary (New Delhi, 1987).<p>\n" +
|
||||||
"LG: Losang Gyatso\'s Blo rig.<p>\n" +
|
"LG: Losang Gyatso\'s Blo rig.<p>\n" +
|
||||||
"LM: ??? {PH: see skye bu chung ngu ... }<p>\n" +
|
"LM: ??? {PH: see skye bu chung ngu ... }<p>\n" +
|
||||||
"LR: Hopkins, Jeffrey. Glossary for Gsung rab kun gyi snying po lam rim gyi " +
|
"LR: Hopkins, Jeffrey. Glossary for Gsung rab kun gyi snying po lam rim gyi " +
|
||||||
"gtso bo rnam pa gsung gi khrid yid gzhan phan snying po (by Panchen Lama IV).<p>\n" +
|
"gtso bo rnam pa gsung gi khrid yid gzhan phan snying po (by Panchen Lama IV).<p>\n" +
|
||||||
"LSR: Tsul-trim-nam-gyel (Tshul khrims rnam rgyal). Presentation of Signs " +
|
"LSR: Tsul-trim-nam-gyel (Tshul khrims rnam rgyal). Presentation of Signs " +
|
||||||
"and Reasonings (Rtags rigs kyi rnam bzhag).<p>\n" +
|
"and Reasonings (Rtags rigs kyi rnam bzhag).<p>\n" +
|
||||||
"LWT: Lo-sang-gon-chok (Blo bzang dkon mchog). Word Commentary on the Root " +
|
"LWT: Lo-sang-gon-chok (Blo bzang dkon mchog). Word Commentary on the Root " +
|
||||||
"Text of [Jam-yang-shay-ba\'s] \"Tenets\".<p>\n" +
|
"Text of [Jam-yang-shay-ba\'s] \"Tenets\".<p>\n" +
|
||||||
"ME: Hopkins, Jeffrey. Meditation on Emptiness (London, Wisdom, 1983).<p>\n" +
|
"ME: Hopkins, Jeffrey. Meditation on Emptiness (London, Wisdom, 1983).<p>\n" +
|
||||||
"MGP: ??? {PH: see bkag (examples) }<p>\n" +
|
"MGP: ??? {PH: see bkag (examples) }<p>\n" +
|
||||||
"MSA: Nagao, Gadjin. Index to the Mahayanasutralankara (Tokyo: Nippon " +
|
"MSA: Nagao, Gadjin. Index to the Mahayanasutralankara (Tokyo: Nippon " +
|
||||||
"Gakujutsu Shinkvo-kai, 1958).<p>\n" +
|
"Gakujutsu Shinkvo-kai, 1958).<p>\n" +
|
||||||
"MSI: Dzong-ka-ba (Tsong kha pa blo bzang grags pa). Middling Exposition of " +
|
"MSI: Dzong-ka-ba (Tsong kha pa blo bzang grags pa). Middling Exposition of " +
|
||||||
"Special Insight (Lhag mthong \'bring).<p>\n" +
|
"Special Insight (Lhag mthong \'bring).<p>\n" +
|
||||||
"MV: Nagao, Gadjin. Index to the Madhyanta-vibhaga (Tokyo: 1961).<p>\n" +
|
"MV: Nagao, Gadjin. Index to the Madhyanta-vibhaga (Tokyo: 1961).<p>\n" +
|
||||||
"N: Zuiryu NAKAMURA. Index to the Ratnagotravibhaga-mahayanottaratantra-sastra " +
|
"N: Zuiryu NAKAMURA. Index to the Ratnagotravibhaga-mahayanottaratantra-sastra " +
|
||||||
"(Tokyo, 1961).<p>\n" +
|
"(Tokyo, 1961).<p>\n" +
|
||||||
"P: Peking edition of the Tripitaka.<p>\n" +
|
"P: Peking edition of the Tripitaka.<p>\n" +
|
||||||
"PGP: Lo-sang-da-yang (Blo bzang rta dbyangs). Presentation of the Grounds " +
|
"PGP: Lo-sang-da-yang (Blo bzang rta dbyangs). Presentation of the Grounds " +
|
||||||
"and Paths in Prasangika (Thal \'gyur pa\'i sa lam).<p>\n" +
|
"and Paths in Prasangika (Thal \'gyur pa\'i sa lam).<p>\n" +
|
||||||
"PP: Candrakirti. Prasannapada.<p>\n" +
|
"PP: Candrakirti. Prasannapada.<p>\n" +
|
||||||
"S: Samdhinirmocana-sutra (Tok Palace version, 160 pp., Leh, Ladakh: Shesrig " +
|
"S: Samdhinirmocana-sutra (Tok Palace version, 160 pp., Leh, Ladakh: Shesrig " +
|
||||||
"Dpemzod, 1975-1980, vol. ja).<p>\n" +
|
"Dpemzod, 1975-1980, vol. ja).<p>\n" +
|
||||||
"TAK: Pur-bu-jok (Phur bu lcog). Explanation of the Presentation of Objects " +
|
"TAK: Pur-bu-jok (Phur bu lcog). Explanation of the Presentation of Objects " +
|
||||||
"and Object-Possessors as Well as Awareness and Knowledge (Yul dang yul can " +
|
"and Object-Possessors as Well as Awareness and Knowledge (Yul dang yul can " +
|
||||||
"dang blo rig).<p>\n" +
|
"dang blo rig).<p>\n" +
|
||||||
"TCT: Pur-bu-jok (Phur bu lcog). Presentation of the Collected Topics (Yongs " +
|
"TCT: Pur-bu-jok (Phur bu lcog). Presentation of the Collected Topics (Yongs " +
|
||||||
"\'dzin bsdus grwa).<p>\n" +
|
"\'dzin bsdus grwa).<p>\n" +
|
||||||
"TGP: Nga-wang-bel-den (Ngag dbang dpal ldan). Treatise Illuminating the " +
|
"TGP: Nga-wang-bel-den (Ngag dbang dpal ldan). Treatise Illuminating the " +
|
||||||
"Presentation of the Four Great Secret Tantra Sets (Sngags kyi sa lam).<p>\n" +
|
"Presentation of the Four Great Secret Tantra Sets (Sngags kyi sa lam).<p>\n" +
|
||||||
"TN: Vasubandhu. Trisvabhavanirdesha.<p>\n" +
|
"TN: Vasubandhu. Trisvabhavanirdesha.<p>\n" +
|
||||||
"VM: Bu-don-rin-chen-drup (bu ston rin chen grub), The Practice of " +
|
"VM: Bu-don-rin-chen-drup (bu ston rin chen grub), The Practice of " +
|
||||||
"(Jnandagarbha\'s) \"The Rite of the Vajra-Element Mandala: The Source of All " +
|
"(Jnandagarbha\'s) \"The Rite of the Vajra-Element Mandala: The Source of All " +
|
||||||
"Vajras\": A Precious Enhancer of Thought (rDo rje dbyings kyi dkyil \'khor gyi " +
|
"Vajras\": A Precious Enhancer of Thought (rDo rje dbyings kyi dkyil \'khor gyi " +
|
||||||
"cho ga rdo rje thams cad \'byung ba zhes bya ba\'i lag len rin chen bsam \'phel), " +
|
"cho ga rdo rje thams cad \'byung ba zhes bya ba\'i lag len rin chen bsam \'phel), " +
|
||||||
"in Collected Works, Part 12 na. Lhasa: Zhol Printing House, 1990.<p>\n" +
|
"in Collected Works, Part 12 na. Lhasa: Zhol Printing House, 1990.<p>\n" +
|
||||||
"Y: Susumi YAMAGUCHI.Index to the Prasannapada Madhyamakavrtti. " +
|
"Y: Susumi YAMAGUCHI.Index to the Prasannapada Madhyamakavrtti. " +
|
||||||
"(Kyoto: Heirakuji-Shoten, 1974).<p>\n" +
|
"(Kyoto: Heirakuji-Shoten, 1974).<p>\n" +
|
||||||
"YT: Oral commentary by Yeshi Thupten.";
|
"YT: Oral commentary by Yeshi Thupten.";
|
||||||
|
|
||||||
protected SimplifiedLinkedList wordList;
|
protected SimplifiedLinkedList wordList;
|
||||||
|
|
||||||
public TibetanScanner()
|
public TibetanScanner()
|
||||||
{
|
{
|
||||||
wordList = new SimplifiedLinkedList();
|
wordList = new SimplifiedLinkedList();
|
||||||
}
|
}
|
||||||
|
|
||||||
public void clearTokens()
|
public void clearTokens()
|
||||||
|
@ -184,68 +184,65 @@ public abstract class TibetanScanner
|
||||||
|
|
||||||
public Word[] getWordArray()
|
public Word[] getWordArray()
|
||||||
{
|
{
|
||||||
return getWordArray(true);
|
return getWordArray(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
public Word[] getWordArray(boolean includeRepeated)
|
public Word[] getWordArray(boolean includeRepeated)
|
||||||
{
|
{
|
||||||
Token token;
|
Token token;
|
||||||
Word array[], word;
|
Word array[], word;
|
||||||
int n=0;
|
int n=0;
|
||||||
SimplifiedListIterator li = wordList.listIterator();
|
SimplifiedListIterator li = wordList.listIterator();
|
||||||
SimplifiedLinkedList ll2, ll = new SimplifiedLinkedList();
|
SimplifiedLinkedList ll2, ll = new SimplifiedLinkedList();
|
||||||
|
|
||||||
while(li.hasNext())
|
while(li.hasNext())
|
||||||
{
|
{
|
||||||
token = (Token) li.next();
|
token = (Token) li.next();
|
||||||
|
|
||||||
if (token instanceof Word)
|
if (token instanceof Word)
|
||||||
{
|
{
|
||||||
ll.addLast(token);
|
ll.addLast(token);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (includeRepeated)
|
if (includeRepeated)
|
||||||
{
|
{
|
||||||
n = ll.size();
|
n = ll.size();
|
||||||
if (n==0) return null;
|
if (n==0) return null;
|
||||||
|
|
||||||
array = new Word[n];
|
array = new Word[n];
|
||||||
li = ll.listIterator();
|
li = ll.listIterator();
|
||||||
|
|
||||||
n=0;
|
n=0;
|
||||||
while (li.hasNext())
|
while (li.hasNext())
|
||||||
{
|
{
|
||||||
array[n++] = (Word) li.next();
|
array[n++] = (Word) li.next();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
ll2 = new SimplifiedLinkedList();
|
ll2 = new SimplifiedLinkedList();
|
||||||
li = ll.listIterator();
|
li = ll.listIterator();
|
||||||
|
|
||||||
while(li.hasNext())
|
while(li.hasNext())
|
||||||
{
|
{
|
||||||
word = (Word) li.next();
|
word = (Word) li.next();
|
||||||
if (!ll2.contains(word)) ll2.addLast(word);
|
if (!ll2.contains(word)) ll2.addLast(word);
|
||||||
}
|
}
|
||||||
|
|
||||||
n = ll2.size();
|
n = ll2.size();
|
||||||
|
|
||||||
if (n==0) return null;
|
if (n==0) return null;
|
||||||
|
|
||||||
array = new Word[n];
|
array = new Word[n];
|
||||||
li = ll2.listIterator();
|
li = ll2.listIterator();
|
||||||
|
|
||||||
while (li.hasNext())
|
while (li.hasNext())
|
||||||
{
|
{
|
||||||
array[--n] = (Word) li.next();
|
array[--n] = (Word) li.next();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return array;
|
||||||
|
|
||||||
|
|
||||||
return array;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public abstract void scanLine(String linea);
|
public abstract void scanLine(String linea);
|
||||||
|
|
Loading…
Reference in a new issue