From 5a0e454a2eda63dd1b47091d698f9c5dc6f6e7a7 Mon Sep 17 00:00:00 2001 From: amontano Date: Tue, 3 Mar 2009 05:23:49 +0000 Subject: [PATCH] Fixed translation tool servlet issues: got rid of title, deleted white space, dealt with UTF8 better, etc. --- .../BasicTibetanTranscriptionConverter.java | 19 +- source/org/thdl/tib/scanner/Manipulate.java | 963 ++++++++--------- .../thdl/tib/scanner/OnLineScannerFilter.java | 972 +++++++++--------- .../thdl/tib/scanner/RemoteScannerFilter.java | 366 ++++--- .../org/thdl/tib/scanner/ScannerLogger.java | 177 ++-- .../org/thdl/tib/scanner/TibetanScanner.java | 511 +++++---- 6 files changed, 1574 insertions(+), 1434 deletions(-) diff --git a/source/org/thdl/tib/scanner/BasicTibetanTranscriptionConverter.java b/source/org/thdl/tib/scanner/BasicTibetanTranscriptionConverter.java index 8d018b9..bc56b2f 100644 --- a/source/org/thdl/tib/scanner/BasicTibetanTranscriptionConverter.java +++ b/source/org/thdl/tib/scanner/BasicTibetanTranscriptionConverter.java @@ -46,6 +46,7 @@ public class BasicTibetanTranscriptionConverter implements FontConverterConstant private static final int WYLIE_TO_ACIP=2; private static final int UNICODE_TO_WYLIE=3; private static final int WYLIE_TO_UNICODE=4; + private static final int TIBETAN_UNICODE_RANGE[] = {3840, 4095}; /** Converts from the Acip transliteration scheme to EWTS.*/ public static String acipToWylie(String acip) @@ -252,7 +253,19 @@ public class BasicTibetanTranscriptionConverter implements FontConverterConstant nuevaPalabra = Manipulate.fixWazur(nuevaPalabra); return nuevaPalabra;*/ } - + + private static int getTibetanUnicodeStart(String unicode, int pos) + { + for(; pos < unicode.length(); pos++ ) if(unicode.codePointAt(pos)>=TIBETAN_UNICODE_RANGE[0] && unicode.codePointAt(pos)<=TIBETAN_UNICODE_RANGE[1]) return pos; + return -1; + } + + private static int getTibetanUnicodeEnd(String unicode, int pos) + { + for(; pos < unicode.length(); pos++ ) if(unicode.codePointAt(pos)TIBETAN_UNICODE_RANGE[1]) return pos; + return pos; + } + /** Converts Tibetan Unicode to EWTS. */ public static String unicodeToWylie(String unicode) { @@ -261,9 +274,9 @@ public class BasicTibetanTranscriptionConverter implements FontConverterConstant TibetanDocument tibDoc; StringBuffer errors; int posStart=0, posEnd; - while((posStart = Manipulate.getTibetanUnicodeStart(unicode, posStart))>=0) + while((posStart = getTibetanUnicodeStart(unicode, posStart))>=0) { - posEnd = Manipulate.getTibetanUnicodeEnd(unicode, posStart+1); + posEnd = getTibetanUnicodeEnd(unicode, posStart+1); startString = unicode.substring(0, posStart); tibetanString = unicode.substring(posStart, posEnd); endString = unicode.substring(posEnd); diff --git a/source/org/thdl/tib/scanner/Manipulate.java b/source/org/thdl/tib/scanner/Manipulate.java index ec15fa3..1a88e32 100644 --- a/source/org/thdl/tib/scanner/Manipulate.java +++ b/source/org/thdl/tib/scanner/Manipulate.java @@ -1,463 +1,500 @@ -/* -The contents of this file are subject to the AMP Open Community License -Version 1.0 (the "License"); you may not use this file except in compliance -with the License. You may obtain a copy of the License on the AMP web site -(http://www.tibet.iteso.mx/Guatemala/). - -Software distributed under the License is distributed on an "AS IS" basis, -WITHOUT WARRANTY OF ANY KIND, either express or implied. See the -License for the specific terms governing rights and limitations under the -License. - -The Initial Developer of this software is Andres Montano Pellegrini. Portions -created by Andres Montano Pellegrini are Copyright 2001 Andres Montano -Pellegrini. All Rights Reserved. - -Contributor(s): ______________________________________. -*/ -package org.thdl.tib.scanner; - -/** Miscelaneous static methods for the manipulation of Tibetan text. - - @author Andrés Montano Pellegrini -*/ - -public class Manipulate -{ - private static String endOfParagraphMarks = "/;|!:^@#$%="; - private static String bracketMarks = "<>(){}[]"; - private static String endOfSyllableMarks = " _\t"; - private static String allStopMarkers = endOfSyllableMarks + endOfParagraphMarks + bracketMarks; - private static final int TIBETAN_UNICODE_RANGE[] = {3840, 4095}; - - /* public static String[] parseFields (String s, char delimiter) - { - int pos; - String field; - SimplifiedLinkedList ll = new SimplifiedLinkedList(); - - while ((pos = s.indexOf(delimiter))>=0) - { - field = s.substring(0, pos).trim(); - ll.addLast(field); - s = s.substring(pos+1); - } - - ll.addLast(s.trim()); - return ll.toStringArray(); - }*/ - - public static int indexOfAnyChar(String str, String chars) - { - int i; - for (i=0; i=0) - return i; - } - - return -1; - } - - public static int indexOfExtendedEndOfSyllableMark(String word) - { - return indexOfAnyChar(word, allStopMarkers); - } - - public static int indexOfBracketMarks(String word) - { - return indexOfAnyChar(word, bracketMarks); - } - - public static boolean isPunctuationMark(int ch) - { - return endOfParagraphMarks.indexOf(ch)>=0 || bracketMarks.indexOf(ch)>=0; - } - - public static boolean isEndOfParagraphMark(int ch) - { - return endOfParagraphMarks.indexOf(ch)>=0; - } - - public static boolean isEndOfSyllableMark(int ch) - { - return endOfSyllableMarks.indexOf(ch)>=0; - } - - public static boolean isMeaningful(String s) - { - for (int i=0; i10) n = 10; - for (i=0; i=0xF00 && ch<=0xFFF; - } - - public static boolean isTibetanUnicodeLetter(char ch) - { - - return ch>=0xF40 && ch<=0xFBC || ch>=0xF00 && ch<=0xF03; - } - - public static boolean isTibetanUnicodeDigit(char ch) - { - - return ch>=0xF20 && ch<=0xF33; - } - - public static boolean guessIfUnicode(String line) - { - char ch; - int unicode=0, i, n; - n = line.length(); - if (n>10) n = 10; - for (i=0; i0) - { - switch (ch) - { - case 'r': case 'l': case 'w': i--; - break; - case 'y': - ch2 = sil.charAt(i-1); - switch (ch2) - { - case '.': return "y"; - case 'n': return "ny"; - default: i--; - } - } - } - if (i==0) return sil.substring(i,i+1); - ch = sil.charAt(i); - ch2 = sil.charAt(i-1); - - switch(ch) - { - case 'h': - switch (ch2) - { - case 'k': case 'c': case 't': case 'p': case 'z': - return sil.substring(i-1,i+1); - case 's': - if (i-2>=0 && sil.charAt(i-2)=='t') return "tsh"; - else return "sh"; - default: return "h"; - } - case 's': - if (ch2=='t') return "ts"; - else return "s"; - case 'g': - if (ch2=='n') return "ng"; - else return "g"; - case 'z': - if (ch2=='d') return "dz"; - else return "z"; - } - return sil.substring(i,i+1); - } - - public static String deleteQuotes(String s) - { - int length = s.length(), pos; - if (length>2) - { - if ((s.charAt(0)=='\"') && (s.charAt(length-1)=='\"')) - s = s.substring(1,length-1); - - do - { - pos = s.indexOf("\"\""); - if (pos<0) break; - s = Manipulate.deleteSubstring(s, pos, pos+1); - } while (true); - } - - return s; - } - - - - /** Syntax: java Manipulate [word-file] < source-dic-entries > dest-dic-entries - - Takes the output of ConsoleScannerFilter - (in RY format), converts the Wylie to Acip - and displays the result in csv format. - arch-palabras es usado solo cuando deseamos las palabras cambiadas - a otro archivo. - - - public static void main (String[] args) throws Exception - { - String linea, palabra, definicion, nuevaPalabra; - int marker; - PrintWriter psPalabras = null; - - BufferedReader keyb = new BufferedReader(new InputStreamReader(System.in)); - - if (args.length==1) - psPalabras = new PrintWriter(new FileOutputStream(args[0])); - - while ((linea=keyb.readLine())!=null) - { - if (linea.trim().equals("")) continue; - marker = linea.indexOf('-'); - if (marker<0) // linea tiene error - { - palabra = linea; - definicion = ""; - } - else - { - palabra = linea.substring(0, marker).trim(); - definicion = linea.substring(marker+1).trim(); - } - - nuevaPalabra = wylieToAcip(palabra); - - if (psPalabras!=null) - psPalabras.println(nuevaPalabra); - else System.out.print(nuevaPalabra + '\t'); - if (definicion.equals("")) - System.out.println(palabra); - else - System.out.println(palabra + '\t' + definicion); - } - if (psPalabras!=null) psPalabras.flush(); - }*/ - - /** From http://www.i18nfaq.com/2005/07/how-do-i-convert-ncr-format-to-java.html */ - public static String NCR2UnicodeString(String str) - { - StringBuffer ostr = new StringBuffer(); - int i1=0; - int i2=0; - - while(i2=TIBETAN_UNICODE_RANGE[0] && unicode.codePointAt(pos)<=TIBETAN_UNICODE_RANGE[1]) return pos; - return -1; - } - - public static int getTibetanUnicodeEnd(String unicode, int pos) - { - for(; pos < unicode.length(); pos++ ) if(unicode.codePointAt(pos)TIBETAN_UNICODE_RANGE[1]) return pos; - return pos; - } - -} +/* +The contents of this file are subject to the AMP Open Community License +Version 1.0 (the "License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License on the AMP web site +(http://www.tibet.iteso.mx/Guatemala/). + +Software distributed under the License is distributed on an "AS IS" basis, +WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +License for the specific terms governing rights and limitations under the +License. + +The Initial Developer of this software is Andres Montano Pellegrini. Portions +created by Andres Montano Pellegrini are Copyright 2001 Andres Montano +Pellegrini. All Rights Reserved. + +Contributor(s): ______________________________________. + */ +package org.thdl.tib.scanner; + +/** Miscelaneous static methods for the manipulation of Tibetan text. + + @author Andrés Montano Pellegrini + */ + +public class Manipulate +{ + private static String endOfParagraphMarks = "/;|!:^@#$%=,"; + private static String bracketMarks = "<>(){}[]"; + private static String endOfSyllableMarks = " _\t"; + private static String allStopMarkers = endOfSyllableMarks + endOfParagraphMarks + bracketMarks; + private static final int TIBETAN_UNICODE_RANGE[] = {3840, 4095}; + private static String JSON_ESCAPABLES = "\"\\/"; + + /* public static String[] parseFields (String s, char delimiter) + { + int pos; + String field; + SimplifiedLinkedList ll = new SimplifiedLinkedList(); + + while ((pos = s.indexOf(delimiter))>=0) + { + field = s.substring(0, pos).trim(); + ll.addLast(field); + s = s.substring(pos+1); + } + + ll.addLast(s.trim()); + return ll.toStringArray(); + }*/ + + public static int indexOfAnyChar(String str, String chars) + { + int i; + for (i=0; i=0) + return i; + } + + return -1; + } + + public static int indexOfExtendedEndOfSyllableMark(String word) + { + return indexOfAnyChar(word, allStopMarkers); + } + + public static int indexOfBracketMarks(String word) + { + return indexOfAnyChar(word, bracketMarks); + } + + public static boolean isPunctuationMark(int ch) + { + return endOfParagraphMarks.indexOf(ch)>=0 || bracketMarks.indexOf(ch)>=0; + } + + public static boolean isEndOfParagraphMark(int ch) + { + return endOfParagraphMarks.indexOf(ch)>=0; + } + + public static boolean isEndOfSyllableMark(int ch) + { + return endOfSyllableMarks.indexOf(ch)>=0; + } + + public static boolean isMeaningful(String s) + { + for (int i=0; i10) n = 10; + for (i=0; i=0xF00 && ch<=0xFFF; + } + + public static boolean isTibetanUnicodeLetter(char ch) + { + + return ch>=0xF40 && ch<=0xFBC || ch>=0xF00 && ch<=0xF03; + } + + public static boolean isTibetanUnicodeDigit(char ch) + { + + return ch>=0xF20 && ch<=0xF33; + } + + + public static boolean guessIfUnicode(String line) + { + char ch; + int unicode=0, i, n; + n = line.length(); + if (n>10) n = 10; + for (i=0; i=sil.length()) return null; + } + if (i==0) return ""; + + i--; + if (i==-1) return ""; + + if (sil.charAt(i)=='-') i--; + if (i>0 && sil.charAt(i)=='w') i--; + ch = sil.charAt(i); + + // check to see if it is a subscript (y, r, l, w) + if (i>0) + { + switch (ch) + { + case 'r': case 'l': i--; + break; + case 'y': + ch2 = sil.charAt(i-1); + switch (ch2) + { + case '.': return "y"; + case 'n': return "ny"; + default: i--; + } + } + } + if (sil.charAt(i)=='+') i--; + if (i==0) return sil.substring(i,i+1); + ch = sil.charAt(i); + ch2 = sil.charAt(i-1); + + switch(ch) + { + case 'h': + switch (ch2) + { + case 'k': case 'c': case 't': case 'p': case 'z': + return sil.substring(i-1,i+1); + case '+': + return sil.substring(i-2, i-1); + case 's': + if (i-2>=0 && sil.charAt(i-2)=='t') return "tsh"; + else return "sh"; + default: return "h"; + } + case 's': + if (ch2=='t') return "ts"; + else return "s"; + case 'g': + if (ch2=='n') return "ng"; + else return "g"; + case 'z': + if (ch2=='d') return "dz"; + else return "z"; + } + return sil.substring(i,i+1); + } + + public static String deleteQuotes(String s) + { + int length = s.length(), pos; + if (length>2) + { + if ((s.charAt(0)=='\"') && (s.charAt(length-1)=='\"')) + s = s.substring(1,length-1); + + do + { + pos = s.indexOf("\"\""); + if (pos<0) break; + s = Manipulate.deleteSubstring(s, pos, pos+1); + } while (true); + } + + return s; + } + + + + /** Syntax: java Manipulate [word-file] < source-dic-entries > dest-dic-entries + + Takes the output of ConsoleScannerFilter + (in RY format), converts the Wylie to Acip + and displays the result in csv format. + arch-palabras es usado solo cuando deseamos las palabras cambiadas + a otro archivo. + + + public static void main (String[] args) throws Exception + { + String linea, palabra, definicion, nuevaPalabra; + int marker; + PrintWriter psPalabras = null; + + BufferedReader keyb = new BufferedReader(new InputStreamReader(System.in)); + + if (args.length==1) + psPalabras = new PrintWriter(new FileOutputStream(args[0])); + + while ((linea=keyb.readLine())!=null) + { + if (linea.trim().equals("")) continue; + marker = linea.indexOf('-'); + if (marker<0) // linea tiene error + { + palabra = linea; + definicion = ""; + } + else + { + palabra = linea.substring(0, marker).trim(); + definicion = linea.substring(marker+1).trim(); + } + + nuevaPalabra = wylieToAcip(palabra); + + if (psPalabras!=null) + psPalabras.println(nuevaPalabra); + else System.out.print(nuevaPalabra + '\t'); + if (definicion.equals("")) + System.out.println(palabra); + else + System.out.println(palabra + '\t' + definicion); + } + if (psPalabras!=null) psPalabras.flush(); + }*/ + + /** From http://www.i18nfaq.com/2005/07/how-do-i-convert-ncr-format-to-java.html */ + public static String NCR2UnicodeString(String str) + { + StringBuffer ostr = new StringBuffer(); + int i1=0; + int i2=0; + + while(i2=0) + { + len = str.length(); + str = str.substring(0, i) + "\\" + str.substring(i, len); + i++; + } + } + str = replace(str, "\b", "\\b"); + str = replace(str, "\f", "\\f"); + str = replace(str, "\n", "\\n"); + str = replace(str, "\r", "\\r"); + str = replace(str, "\t", "\\t"); + return str; + } + + public static boolean containsLetters(String str) + { + int i=0; + if (str==null) return false; + while (i=TIBETAN_UNICODE_RANGE[0] && unicode.codePointAt(pos)<=TIBETAN_UNICODE_RANGE[1]) return pos; + return -1; + } + + public static int getTibetanUnicodeEnd(String unicode, int pos) + { + for(; pos < unicode.length(); pos++ ) if(unicode.codePointAt(pos)TIBETAN_UNICODE_RANGE[1]) return pos; + return pos; + } +} diff --git a/source/org/thdl/tib/scanner/OnLineScannerFilter.java b/source/org/thdl/tib/scanner/OnLineScannerFilter.java index 8e61333..11740ad 100644 --- a/source/org/thdl/tib/scanner/OnLineScannerFilter.java +++ b/source/org/thdl/tib/scanner/OnLineScannerFilter.java @@ -1,465 +1,507 @@ -/* - The contents of this file are subject to the AMP Open Community License - Version 1.0 (the "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License on the AMP web site - (http://www.tibet.iteso.mx/Guatemala/). - - Software distributed under the License is distributed on an "AS IS" basis, - WITHOUT WARRANTY OF ANY KIND, either express or implied. See the - License for the specific terms governing rights and limitations under the - License. - - The Initial Developer of this software is Andres Montano Pellegrini. Portions - created by Andres Montano Pellegrini are Copyright 2001 Andres Montano - Pellegrini. All Rights Reserved. - - Contributor(s): ______________________________________. - */ - -package org.thdl.tib.scanner; - -import java.io.PrintWriter; -import java.util.MissingResourceException; -import java.util.ResourceBundle; - -import javax.servlet.http.HttpServlet; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import org.thdl.util.ThdlOptions; - -/** Interfase to provide access to an on-line dictionary through a form in html; - Inputs Tibetan text (Roman script only) and displays the - words (Roman or Tibetan script) with their definitions. - Runs on the server and is called upon through an HTTP request directly - by the browser. Requires no additional software installed on the client. - - @author Andrés Montano Pellegrini - */ -public class OnLineScannerFilter extends HttpServlet -{ - private final static String propertyFile = "dictionary"; - private final static String dictNameProperty = "onlinescannerfilter.dict-file-name"; - private final static String otherLinksProperty = "onlinescannerfilter.links-to-other-stuff"; - private final static String moreLinksProperty = "onlinescannerfilter.links-to-more-stuff"; - private final static String clearStr = "Clear"; - private final static String buttonStr = "button"; - private final static String scriptStr = "script"; - private final static String tibetanStr = "tibetan"; - - ResourceBundle rb; - private TibetanScanner scanner; - private String dictionaries[]; - private ScannerLogger sl; - - public OnLineScannerFilter() //throws Exception - { - rb = ResourceBundle.getBundle(propertyFile); - sl = new ScannerLogger(); - - try - { - scanner = new LocalTibetanScanner(rb.getString(dictNameProperty), false); - } - catch (Exception e) - { - sl.writeLog("1\t1"); - sl.writeException(e); - } - - dictionaries = scanner.getDictionaryDescriptions(); - sl.writeLog("2\t1"); - } - - synchronized public void doGet(HttpServletRequest request, - HttpServletResponse response) //throws IOException, ServletException - { - String answer, parrafo = null, checkboxName; - - // if this line is included in the constructor, it works on the orion server but not on wyllie! - ThdlOptions.setUserPreference("thdl.rely.on.system.tmw.fonts", true); - ThdlOptions.setUserPreference("thdl.rely.on.system.tm.fonts", true); - - response.setContentType("text/html"); - PrintWriter out; - sl.setUserIP(request.getRemoteAddr()); - - try - { - out = response.getWriter(); - } - catch (Exception e) - { - sl.writeLog("1\t1"); - sl.writeException(e); - return; - } - - BitDictionarySource ds=null; - boolean checkedDicts[], allUnchecked, wantsTibetan, useTHDLBanner = (request.getParameter("thdlBanner")!=null); - // int percent=100; - - out.println(""); - out.println(""); - out.println(""); - if (useTHDLBanner) - { - out.println(" Tibetan and Himalayan Digital Library - The Online Tibetan to English Translation/Dictionary Tool"); - out.println(" "); - out.println(" "); - out.println(" "); - } - else - out.println(" The Online Tibetan to English Translation/Dictionary Tool"); - - out.println(" "); - out.println(" "); - out.println(" "); - - answer = request.getParameter(scriptStr); - - /* script==null || makes default tibetan - script!=null && makes default roman - */ - wantsTibetan = (answer==null || answer.equals(tibetanStr)); - /*if (wantsTibetan) - { - out.println(""); - }*/ - out.println(""); - out.println(""); - - if (useTHDLBanner) - { - out.println(""); - out.println("
"); - out.println("
"); - out.println("
"); - out.println("

"); - out.println(" "); - out.println(" "); - out.println(" "); - out.println("

"); - out.println("
"); - out.println("
"); - out.println("
"); - out.println(" Home > Reference > Translation Tool"); - out.println("
"); - out.println("
"); - out.println("
"); - } - - out.println("

The Online Tibetan to English Translation/Dictionary Tool

"); - - try - { - out.println(rb.getString(otherLinksProperty)); - } - catch (MissingResourceException e) - { - // do nothing - } - - if (useTHDLBanner) - { - out.println("
"); - } - else - { - out.println(""); - } - out.println(""); - out.println(" "); - out.println(" "); - out.println(" "); - out.println(" "); - out.println("
"); - out.println("

Display results in:

"); - out.println("

Tibetan script (using Tibetan Machine Uni font)
"); - out.println(" Roman script

"); - - if (dictionaries!=null) - { - int i; - ds = scanner.getDictionarySource(); - ds.reset(); - checkedDicts = new boolean[dictionaries.length]; - /* out.println(" "); - out.println("Search in dictionaries:");*/ - out.println("

Search in dictionaries: "); - allUnchecked=true; - for (i=0; i"); - out.print("" + dictionaries[i] + " (" + DictionarySource.defTags[i] + ")   "); - else - out.print(">" + DictionarySource.defTags[i] + "   "); -// out.println(" + ""); - } -// out.println(" "); - } - // fix for updates - else ds = BitDictionarySource.getAllDictionaries(); -// out.println(""); - out.println("

"); - out.println(""); - out.println(" "); - out.println(" "); - out.println(" "); - out.println(" "); - out.println("
"); - out.println("

Input text:

"); - out.println("
"); - out.println("

"); - out.println("
"); - - out.print(""); - out.println("
"); - try - { - out.println(rb.getString(moreLinksProperty)); - } - catch (MissingResourceException e) - { - // do nothing - } - - if (parrafo != null) - { - sl.writeLog("4\t1"); - if (ds!=null && !ds.isEmpty()) - desglosar(parrafo, out, wantsTibetan); - } - else sl.writeLog("3\t1"); - - out.println(TibetanScanner.copyrightHTML); - if (useTHDLBanner) out.println("
"); - out.println(""); - out.println(""); - } - - public void doPost(HttpServletRequest request, - HttpServletResponse response) - //throws IOException, ServletException - { - doGet(request, response); - } - - synchronized public void desglosar(String in, PrintWriter pw, boolean tibetan) - { - //boolean hayMasLineas=true; - //int init = 0, fin; - //String linea; - Object words[]; - - if (!in.equals("")) - { - /* while (hayMasLineas) - { - fin = in.indexOf("\n",init); - if (fin<0) - { - linea = in.substring(init).trim(); - hayMasLineas=false; - } - else - linea = in.substring(init, fin).trim(); - - scanner.scanBody(linea); - - init = fin+1; - } */ - scanner.clearTokens(); - in = Manipulate.NCR2UnicodeString(in); - if (Manipulate.guessIfUnicode(in)) in = BasicTibetanTranscriptionConverter.unicodeToWylie(in); - else if (Manipulate.guessIfAcip(in)) in = BasicTibetanTranscriptionConverter.acipToWylie(in); - scanner.scanBody(in); - scanner.finishUp(); - printText(pw, tibetan); - printAllDefs(pw, tibetan); - scanner.clearTokens(); - } - } - - public void printText(PrintWriter pw, boolean tibetan) - { - Token words[] = scanner.getTokenArray(); - SwingWord word; - char pm; - int i; - - if (words==null) return; - - pw.print("

"); - for (i=0; i < words.length; i++) - { - - if (words[i] instanceof Word) - { - word = new SwingWord((Word)words[i]); - // if (word.getDefs().getDictionarySource()!=null) - pw.print(word.getLink(tibetan)); - // else pw.print(word.getWylie() + " "); - } - else - { - if (words[i] instanceof PunctuationMark) - { - pm = words[i].toString().charAt(0); - switch (pm) - { - case '\n': - pw.println("

"); - pw.print("

"); - break; - case '<': - pw.print("< "); - break; - case '>': - pw.print("> "); - break; - default: - pw.print(pm + " "); - } - } - } - } - pw.println("

"); - } - - public void printAllDefs(PrintWriter pw, boolean tibetan) { - int i, j, k=0; - Word words[]; - SwingWord word = null; - Definitions defs; - String tag; - DictionarySource ds; - ByteDictionarySource sourceb=null; - - words = scanner.getWordArray(false); - - if (words == null) - return; - pw.println(""); - - for (j = 0; j < words.length; j++) { - try { - - word = new SwingWord(words[j]); - defs = word.getDefs(); - ds = defs.getDictionarySource(); - pw.println(" "); - if (ds == null) { - tag = " "; - } - else { - if (FileSyllableListTree.versionNumber==2) { - tag = ds.getTag(0); - } - else { - sourceb = (ByteDictionarySource) ds; - k=0; - while (sourceb.isEmpty(k)) k++; - tag = sourceb.getTag(k); - k++; - } - } - - pw.print(" "); - pw.println(" "); - pw.println(" "); - - pw.println(" "); - for (i = 1; i < defs.def.length; i++) { - pw.println(" "); - - if (FileSyllableListTree.versionNumber==2) { - tag = ds.getTag(i); - } - else { - while (sourceb.isEmpty(k)) k++; - tag = sourceb.getTag(k); - k++; - } - - pw.println(" "); - pw.println(" "); - //else pw.println(" "); - pw.println(" "); - } - } catch (Exception e) { - sl.writeLog("1\t1\t" + word.getWylie()); - sl.writeException(e); - } - - } - pw.println("
" + word.getBookmark(tibetan) + "" + tag + "" + defs.def[0] + "
" + tag + "" + defs.def[i] + "" + defs.def[i] + "
"); - } - - public void destroy() - { - super.destroy(); - sl.setUserIP(null); - sl.writeLog("5\t1"); - scanner.destroy(); - } - -} +/* + The contents of this file are subject to the AMP Open Community License + Version 1.0 (the "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License on the AMP web site + (http://www.tibet.iteso.mx/Guatemala/). + + Software distributed under the License is distributed on an "AS IS" basis, + WITHOUT WARRANTY OF ANY KIND, either express or implied. See the + License for the specific terms governing rights and limitations under the + License. + + The Initial Developer of this software is Andres Montano Pellegrini. Portions + created by Andres Montano Pellegrini are Copyright 2001 Andres Montano + Pellegrini. All Rights Reserved. + + Contributor(s): ______________________________________. + */ + +package org.thdl.tib.scanner; + +import java.io.PrintWriter; +import java.util.MissingResourceException; +import java.util.ResourceBundle; + +import javax.servlet.http.HttpServlet; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.thdl.util.ThdlOptions; + +/** Interfase to provide access to an on-line dictionary through a form in html; + Inputs Tibetan text (Roman script only) and displays the + words (Roman or Tibetan script) with their definitions. + Runs on the server and is called upon through an HTTP request directly + by the browser. Requires no additional software installed on the client. + + @author Andrés Montano Pellegrini + */ +public class OnLineScannerFilter extends HttpServlet +{ + private final static String propertyFile = "dictionary"; + private final static String dictNameProperty = "onlinescannerfilter.dict-file-name"; + private final static String otherLinksProperty = "onlinescannerfilter.links-to-other-stuff"; + private final static String moreLinksProperty = "onlinescannerfilter.links-to-more-stuff"; + private final static String smallerLinksProperty = "onlinescannerfilter.links-to-smaller-stuff"; + private final static String clearStr = "Clear"; + private final static String buttonStr = "button"; + private final static String scriptStr = "script"; + private final static String tibetanStr = "tibetan"; + + ResourceBundle rb; + private TibetanScanner scanner; + private String dictionaries[]; + private ScannerLogger sl; + + public OnLineScannerFilter() //throws Exception + { + System.setProperty("java.awt.headless","true"); + rb = ResourceBundle.getBundle(propertyFile); + sl = new ScannerLogger(); + + try + { + scanner = new LocalTibetanScanner(rb.getString(dictNameProperty), false); + } + catch (Exception e) + { + sl.writeLog("1\t1"); + sl.writeException(e); + } + + dictionaries = scanner.getDictionaryDescriptions(); + sl.writeLog("2\t1"); + } + + synchronized public void doGet(HttpServletRequest request, + HttpServletResponse response) //throws IOException, ServletException + { + String answer, parrafo = null, checkboxName; + try + { + request.setCharacterEncoding("UTF8"); + } + catch(Exception e) + { + // do nothing + } + // if this line is included in the constructor, it works on the orion server but not on wyllie! + ThdlOptions.setUserPreference("thdl.rely.on.system.tmw.fonts", true); + ThdlOptions.setUserPreference("thdl.rely.on.system.tm.fonts", true); + + response.setContentType("text/html"); + PrintWriter out; + sl.setUserIP(request.getRemoteAddr()); + + try + { + out = response.getWriter(); + } + catch (Exception e) + { + sl.writeLog("1\t1"); + sl.writeException(e); + return; + } + + BitDictionarySource ds=null; + boolean checkedDicts[], allUnchecked, wantsTibetan, useTHDLBanner = (request.getParameter("thdlBanner")!=null); + // int percent=100; + + out.println(""); + out.println(""); + out.println(""); + out.println(" "); + if (useTHDLBanner) + { + out.println(" Tibetan and Himalayan Digital Library - The Online Tibetan to English Dictionary and Translation Tool"); + out.println(" "); + out.println(" "); + } + else + { + out.println(" The Online Tibetan to English Dictionary and Translation Tool"); + out.println(" "); + } + + out.println(" "); + out.println(" "); + out.println(" "); + + answer = request.getParameter(scriptStr); + + /* script==null || makes default tibetan + script!=null && makes default roman + */ + wantsTibetan = (answer==null || answer.equals(tibetanStr)); + /*if (wantsTibetan) + { + out.println(""); + }*/ + out.println(""); + out.println(""); + + if (useTHDLBanner) + { + out.println(""); + out.println("
"); + out.println("
"); + out.println("
"); + out.println("

"); + out.println(" "); + out.println(" "); + out.println(" "); + out.println("

"); + out.println("
"); + out.println("
"); + out.println("
"); + out.println(" Home > Reference > Translation Tool"); + out.println("
"); + out.println("
"); + out.println("
"); + } + try + { + out.println(rb.getString(otherLinksProperty)); + } + catch (MissingResourceException e) + { + // do nothing + } + + if (useTHDLBanner) + { + out.println("
"); + } + else + { + out.println(""); + } + out.println(""); + out.println(" "); + out.println(" "); + out.println(" "); + out.println(" "); + out.println(" "); + out.println(" "); + if (dictionaries!=null) + { + int i; + ds = scanner.getDictionarySource(); + ds.reset(); + checkedDicts = new boolean[dictionaries.length]; + /* out.println(" "); + out.println("");*/ + out.println(""); + } + out.println(" "); + } + // fix for updates + else ds = BitDictionarySource.getAllDictionaries(); +// out.println("
Display results in:"); + out.println(" Tibetan script (Tibetan Machine Uni font)"); + out.println(" Roman script"); + out.println("Help & Offline Installation
Search in dictionaries:
Search in dictionaries: "); + allUnchecked=true; + for (i=0; i"); + out.print("" + dictionaries[i] + " (" + DictionarySource.defTags[i] + ")   "); + else + out.print(">" + DictionarySource.defTags[i] + "   "); +// out.println(" + "
"); +// out.println("

"); +// out.println(""); + out.println(" "); + out.println(" "); + out.println(" "); + out.println("
Input text:  "); + out.println("
"); + answer = request.getParameter(buttonStr); + String smallerLinks=null; + if (answer == null || answer != null && !answer.equals(clearStr)) + { + parrafo = request.getParameter("parrafo"); + } + if (parrafo==null) + { + try + { + smallerLinks = rb.getString(smallerLinksProperty); + } + catch (MissingResourceException e) + { + // do nothing + } + + } + if (smallerLinks!=null) + { + out.println(""); + out.println(""); + out.println(""); + out.println(""); + out.println(""); + out.println("
"); + } + + out.print(""); + if (smallerLinks!=null) + { + out.println(""); + out.println(smallerLinks); + out.println("
"); + } + + out.println("
"); + + if (parrafo != null) + { + sl.writeLog("4\t1"); + if (ds!=null && !ds.isEmpty()) + { + desglosar(parrafo, out, wantsTibetan); + } + } + else sl.writeLog("3\t1"); + + out.println(TibetanScanner.copyrightHTML); + if (useTHDLBanner) out.println("
"); + out.println(""); + out.println(""); + } + + public void doPost(HttpServletRequest request, + HttpServletResponse response) + //throws IOException, ServletException + { + doGet(request, response); + } + + synchronized public void desglosar(String in, PrintWriter pw, boolean tibetan) + { + //boolean hayMasLineas=true; + //int init = 0, fin; + String tmp; + Object words[]; + + if (!in.equals("")) + { + /* while (hayMasLineas) + { + fin = in.indexOf("\n",init); + if (fin<0) + { + linea = in.substring(init).trim(); + hayMasLineas=false; + } + else + linea = in.substring(init, fin).trim(); + + scanner.scanBody(linea); + + init = fin+1; + } */ + scanner.clearTokens(); + in = Manipulate.NCR2UnicodeString(in); + if (Manipulate.guessIfUnicode(in)) in = BasicTibetanTranscriptionConverter.unicodeToWylie(in); + else if (Manipulate.guessIfAcip(in)) in = BasicTibetanTranscriptionConverter.acipToWylie(in); + scanner.scanBody(in); + scanner.finishUp(); + printText(pw, tibetan); + try + { + tmp = rb.getString(moreLinksProperty); + pw.println("

"); + pw.println(tmp); + pw.println("

"); + } + catch (MissingResourceException e) + { + // do nothing + } + printAllDefs(pw, tibetan); + scanner.clearTokens(); + } + } + + public void printText(PrintWriter pw, boolean tibetan) + { + Token words[] = scanner.getTokenArray(); + SwingWord word; + char pm; + int i; + + if (words==null) return; + + pw.print("

"); + for (i=0; i < words.length; i++) + { + + if (words[i] instanceof Word) + { + word = new SwingWord((Word)words[i]); + // if (word.getDefs().getDictionarySource()!=null) + pw.print(word.getLink(tibetan)); + // else pw.print(word.getWylie() + " "); + } + else + { + if (words[i] instanceof PunctuationMark) + { + pm = words[i].toString().charAt(0); + switch (pm) + { + case '\n': + pw.println("

"); + pw.print("

"); + break; + case '<': + pw.print("< "); + break; + case '>': + pw.print("> "); + break; + default: + pw.print(pm + " "); + } + } + } + } + pw.println("

"); + } + + public void printAllDefs(PrintWriter pw, boolean tibetan) { + int i, j, k=0; + Word words[]; + SwingWord word = null; + Definitions defs; + String tag; + DictionarySource ds; + ByteDictionarySource sourceb=null; + + words = scanner.getWordArray(false); + + if (words == null) return; + pw.println(""); + + for (j = 0; j < words.length; j++) { + try { + + word = new SwingWord(words[j]); + defs = word.getDefs(); + ds = defs.getDictionarySource(); + pw.println(" "); + if (ds == null) { + tag = " "; + } + else { + if (FileSyllableListTree.versionNumber==2) { + tag = ds.getTag(0); + } + else { + sourceb = (ByteDictionarySource) ds; + k=0; + while (sourceb.isEmpty(k)) k++; + tag = sourceb.getTag(k); + k++; + } + } + + pw.print(" "); + pw.println(" "); + pw.println(" "); + + pw.println(" "); + for (i = 1; i < defs.def.length; i++) { + pw.println(" "); + + if (FileSyllableListTree.versionNumber==2) { + tag = ds.getTag(i); + } + else { + while (sourceb.isEmpty(k)) k++; + tag = sourceb.getTag(k); + k++; + } + + pw.println(" "); + pw.println(" "); + //else pw.println(" "); + pw.println(" "); + } + } catch (Exception e) { + sl.writeLog("1\t1\t" + word.getWylie()); + sl.writeException(e); + } + + } + pw.println("
" + word.getBookmark(tibetan) + "" + tag + "" + defs.def[0] + "
" + tag + "" + defs.def[i] + "" + defs.def[i] + "
"); + } + + public void destroy() + { + super.destroy(); + sl.setUserIP(null); + sl.writeLog("5\t1"); + scanner.destroy(); + } + +} diff --git a/source/org/thdl/tib/scanner/RemoteScannerFilter.java b/source/org/thdl/tib/scanner/RemoteScannerFilter.java index 79aeb63..78efceb 100644 --- a/source/org/thdl/tib/scanner/RemoteScannerFilter.java +++ b/source/org/thdl/tib/scanner/RemoteScannerFilter.java @@ -1,162 +1,206 @@ -/* -The contents of this file are subject to the AMP Open Community License -Version 1.0 (the "License"); you may not use this file except in compliance -with the License. You may obtain a copy of the License on the AMP web site -(http://www.tibet.iteso.mx/Guatemala/). - -Software distributed under the License is distributed on an "AS IS" basis, -WITHOUT WARRANTY OF ANY KIND, either express or implied. See the -License for the specific terms governing rights and limitations under the -License. - -The Initial Developer of this software is Andres Montano Pellegrini. Portions -created by Andres Montano Pellegrini are Copyright 2001 Andres Montano -Pellegrini. All Rights Reserved. - -Contributor(s): ______________________________________. -*/ -package org.thdl.tib.scanner; - -import java.io.BufferedReader; -import java.io.InputStreamReader; -import java.io.PrintWriter; -import java.util.ResourceBundle; - -import javax.servlet.GenericServlet; -import javax.servlet.ServletRequest; -import javax.servlet.ServletResponse; - -/** Running on the server, receives the tibetan text from applet/applications running on - the client and sends them the words with their definitions through the Internet. - Requests are made through {@link RemoteTibetanScanner}. - - @author Andrés Montano Pellegrini - @see RemoteTibetanScanner -*/ -public class RemoteScannerFilter extends GenericServlet -{ - private TibetanScanner scanner; - private BitDictionarySource ds; - private ScannerLogger sl; - - public RemoteScannerFilter() - { - ResourceBundle rb = ResourceBundle.getBundle("dictionary"); - sl = new ScannerLogger(); - - try - { - scanner = new LocalTibetanScanner(rb.getString("onlinescannerfilter.dict-file-name"),false); - } - catch (Exception e) - { - sl.writeLog("1\t2"); - sl.writeException(e); - } - ds = scanner.getDictionarySource(); - sl.writeLog("Creation\t2"); - } - - public void service(ServletRequest req, ServletResponse res) //throws ServletException, IOException - { - BufferedReader br; - res.setContentType ("text/plain"); - sl.setUserIP(req.getRemoteAddr()); - - Word word = null, words[] = null; - PrintWriter out; - - try - { - out = res.getWriter(); - } - catch (Exception e) - { - sl.writeLog("1\t2"); - sl.writeException(e); - return; - } - - int i; - String linea, dicts = req.getParameter("dicts"), dicDescrip[]; - - if (dicts!=null) - { - if (dicts.equals("names")) - { - sl.writeLog("3\t2"); - dicDescrip = scanner.getDictionaryDescriptions(); - if (dicDescrip==null) - { - out.close(); - return; - } - - for (i=0; iAndrés Montano Pellegrini.
All rights reserved."; - - public static final int NORMAL_MODE=1; - public static final int DEBUG_MODE=2; - public static int mode; - - static - { - mode = NORMAL_MODE; - } - - public static final String aboutTomeraider= - "Welcome to Jeffrey Hopkins' Tibetan-Sanskrit-English Dictionary version 2.0.0!

\n" + - "This file was automatically generated using software developed by Andres Montano Pellegrini. " + - "For more information, see http://www.people.virginia.edu/~am2zb/tibetan .

" + - "Formulator and Editor: Jeffrey Hopkins
\n" + - "Contributors: Joe Wilson, Craig Preston, John Powers, Nathanial Garson, " + - "Paul Hackett, Andres Montano

" + - "A project of the Tibetan Studies Institute, Boonesville, Virginia, and the " + - "University of Virginia Tibetan Studies Program

" + - "\u00A9 Jeffrey Hopkins 1992.

" + - "Apology

" + - "This is a work in progress in crude form that is being shared with students " + - "of the Tibetan language mainly in order to receive input for further " + - "development. The English translations of the entries can be said only to " + - "represent what contributors, over a span of over thirty years, thought were " + - "my current translations. A small number are simply wrong; others need to be " + - "updated; and all will receive much more attention and, hence, detail.

\n" + - "The Dictionary has been entered into a database with fields for the entry, " + - "Sanskrit, tenses, my English, a few others’ interests, examples, " + - "definition, divisions, and comments. At this point, very few entries " + - "contain all of these items, but the plan is provide these, where " + - "appropriate, over the years. Translations for entries that have arisen from " + - "my work and from interactions with my students are in boldface, whereas " + - "those from other works are in regular type on separate lines and are marked " + - "with an initial at the end of the line. A key to these markings is given on " + - "the next page.

\n" + - "(Please note that the radical signs for Sanskrit roots are, after the first" + - "letter of the alphabet, in a state of disarray.)

\n" + - "I hope that you will bear with the many inadequacies of this first release.

\n" + - "Paul Jeffrey Hopkins
\n" + - "Professor of Tibetan Studies

\n" + - "Abbreviations

\n" + - "B-7: ??? {PH: see dngos med ... & dngos po (synonyms) }

\n" + - "BJ: Bel-jor-hlun-drup (Dpal \'byor lhun grub). Legs bshad snying po\'i dka' " + - "\'grel bstan pa\'i sgron me (Buxaduar: Sera Monastery, 1968).

\n" + - "BK: ??? {PH: see bka\' (examples) }

\n" + - "BR: Losang Gyatso (Blo bzang rgya mtsho). Presentation of Knowledge and " + - "Awareness (Blo rig).

\n" + - "BWT: Ngak-wang-bel-den (Ngag dbang dpal ldan). Annotations for " + - "[Jam-yang-shay-ba\'s] \"Tenets\" (Grub mtha\' chen mo\'i mchan).

\n" + - "C: Conze, Edward. Materials for a Dictionary of the Prajnaparamita " + - "Literature (Tokyo: Suzuki Research Foundation, 1967).

\n" + - "col.: colloquial

\n" + - "D1: Pur-bu-jok (Phur bu lcog). Presentation of the Collected Topics " + - "(Part 1: Bsdus grwa chung ngu).

\n" + - "D2: Pur-bu-jok (Phur bu lcog). Presentation of the Collected Topics " + - "(Part 2: Bsdus grwa \'bring).

\n" + - "DASI: Decisive Analysis of Special Insight.

\n" + - "DG: Germano, David. Poetic Thought, the Intelligent Universe, and the " + - "Mystery of Self: the Tantric Synthesis of rDzogs Chen in Fourteenth Century " + - "Tibet. (Ph.d. dissertation, University of Wisconsin, Madison,WI 1992).

\n" + - "DK: Dzong-ka-ba (Tsong kha pa blo bzang grags pa). Drang ba dang nges pa\'i " + - "don rnam par phye ba'i bstan bcos legs bshad snying po (Sarnath: Pleasure of " + - "Elegant Sayings Press, 1979).

\n" + - "Ganden Triba: Oral commentary of Ganden Triba Jam-bel-shen-pen.

\n" + - "GCT: Ngak-wang-dra-shi (Ngag dbang bkra shis). Collected Topics by a " + - "Spiritual Son of Jam-yang-shay-ba (Sgo mang sras bsdus grwa).

\n" + - "GD: Dreyfus, George. Ontology, Philosophy of Language, and Epistemology in " + - "Buddhist Tradition (Ph.d. dissertation. Religious Studies, University of " + - "Virginia, Charlottesville,VA 1991).

\n" + - "Gon-chok: Gon-chok-jik-may-wang-bo (Dkon mchog \'jigs med dbang po). " + - "Precious Garland of Tenets (Grub mtha\' rin chen phreng ba).

\n" + - "Jang.: Jang-gya (Lcang skya rol pa\'i rdo rje). " + - "Presentation of Tenets (Lcang skya grub mtha').

\n" + - "JKA: ??? {PH: see mngon sum (definition) }

\n" + - "KS: Khetsun Sangpo, Biographical Dictionary of Tibet and Tibetan Buddhism. " + - "(LTWA: Dharamsala, HP)

\n" + - "L: Lamotte, Etienne. Samdhinirmocana-sutra " + - "(Louvain: Universite de Louvain, 1935).

\n" + - "LAK: Jam-bel-sam-pel (\'Jam dpal bsam phel). Presentation of Awareness and " + - "Knowledge (Blo rig gi rnam bzhag).

\n" + - "Lati: Oral commentary by Lati Rinbochay.

\n" + - "LCh: Chandra, Lokesh. Tibetan-Sanskrit Dictionary (New Delhi, 1987).

\n" + - "LG: Losang Gyatso\'s Blo rig.

\n" + - "LM: ??? {PH: see skye bu chung ngu ... }

\n" + - "LR: Hopkins, Jeffrey. Glossary for Gsung rab kun gyi snying po lam rim gyi " + - "gtso bo rnam pa gsung gi khrid yid gzhan phan snying po (by Panchen Lama IV).

\n" + - "LSR: Tsul-trim-nam-gyel (Tshul khrims rnam rgyal). Presentation of Signs " + - "and Reasonings (Rtags rigs kyi rnam bzhag).

\n" + - "LWT: Lo-sang-gon-chok (Blo bzang dkon mchog). Word Commentary on the Root " + - "Text of [Jam-yang-shay-ba\'s] \"Tenets\".

\n" + - "ME: Hopkins, Jeffrey. Meditation on Emptiness (London, Wisdom, 1983).

\n" + - "MGP: ??? {PH: see bkag (examples) }

\n" + - "MSA: Nagao, Gadjin. Index to the Mahayanasutralankara (Tokyo: Nippon " + - "Gakujutsu Shinkvo-kai, 1958).

\n" + - "MSI: Dzong-ka-ba (Tsong kha pa blo bzang grags pa). Middling Exposition of " + - "Special Insight (Lhag mthong \'bring).

\n" + - "MV: Nagao, Gadjin. Index to the Madhyanta-vibhaga (Tokyo: 1961).

\n" + - "N: Zuiryu NAKAMURA. Index to the Ratnagotravibhaga-mahayanottaratantra-sastra " + - "(Tokyo, 1961).

\n" + - "P: Peking edition of the Tripitaka.

\n" + - "PGP: Lo-sang-da-yang (Blo bzang rta dbyangs). Presentation of the Grounds " + - "and Paths in Prasangika (Thal \'gyur pa\'i sa lam).

\n" + - "PP: Candrakirti. Prasannapada.

\n" + - "S: Samdhinirmocana-sutra (Tok Palace version, 160 pp., Leh, Ladakh: Shesrig " + - "Dpemzod, 1975-1980, vol. ja).

\n" + - "TAK: Pur-bu-jok (Phur bu lcog). Explanation of the Presentation of Objects " + - "and Object-Possessors as Well as Awareness and Knowledge (Yul dang yul can " + - "dang blo rig).

\n" + - "TCT: Pur-bu-jok (Phur bu lcog). Presentation of the Collected Topics (Yongs " + - "\'dzin bsdus grwa).

\n" + - "TGP: Nga-wang-bel-den (Ngag dbang dpal ldan). Treatise Illuminating the " + - "Presentation of the Four Great Secret Tantra Sets (Sngags kyi sa lam).

\n" + - "TN: Vasubandhu. Trisvabhavanirdesha.

\n" + - "VM: Bu-don-rin-chen-drup (bu ston rin chen grub), The Practice of " + - "(Jnandagarbha\'s) \"The Rite of the Vajra-Element Mandala: The Source of All " + - "Vajras\": A Precious Enhancer of Thought (rDo rje dbyings kyi dkyil \'khor gyi " + - "cho ga rdo rje thams cad \'byung ba zhes bya ba\'i lag len rin chen bsam \'phel), " + - "in Collected Works, Part 12 na. Lhasa: Zhol Printing House, 1990.

\n" + - "Y: Susumi YAMAGUCHI.Index to the Prasannapada Madhyamakavrtti. " + - "(Kyoto: Heirakuji-Shoten, 1974).

\n" + - "YT: Oral commentary by Yeshi Thupten."; - - protected SimplifiedLinkedList wordList; - - public TibetanScanner() - { - wordList = new SimplifiedLinkedList(); - } - - public void clearTokens() - { - wordList = new SimplifiedLinkedList(); - } - - public Token[] getTokenArray() - { - int n=wordList.size(); - if (n==0) return null; - Token token[] = new Token[n]; - SimplifiedListIterator li = wordList.listIterator(); - while(li.hasNext()) - token[--n] = (Token)li.next(); - return token; - } - - public SimplifiedLinkedList getTokenLinkedList() - { - return wordList; - } - - public Word[] getWordArray() - { - return getWordArray(true); - } - - public Word[] getWordArray(boolean includeRepeated) - { - Token token; - Word array[], word; - int n=0; - SimplifiedListIterator li = wordList.listIterator(); - SimplifiedLinkedList ll2, ll = new SimplifiedLinkedList(); - - while(li.hasNext()) - { - token = (Token) li.next(); - - if (token instanceof Word) - { - ll.addLast(token); - } - } - - if (includeRepeated) - { - n = ll.size(); - if (n==0) return null; - - array = new Word[n]; - li = ll.listIterator(); - - n=0; - while (li.hasNext()) - { - array[n++] = (Word) li.next(); - } - } - else - { - ll2 = new SimplifiedLinkedList(); - li = ll.listIterator(); - - while(li.hasNext()) - { - word = (Word) li.next(); - if (!ll2.contains(word)) ll2.addLast(word); - } - - n = ll2.size(); - - if (n==0) return null; - - array = new Word[n]; - li = ll2.listIterator(); - - while (li.hasNext()) - { - array[--n] = (Word) li.next(); - } - } - - - - return array; - } - - public abstract void scanLine(String linea); - public abstract void scanBody(String linea); - public abstract void finishUp(); - public abstract BitDictionarySource getDictionarySource(); - public abstract String[] getDictionaryDescriptions(); - public abstract void destroy(); -} +/* +The contents of this file are subject to the AMP Open Community License +Version 1.0 (the "License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License on the AMP web site +(http://www.tibet.iteso.mx/Guatemala/). + +Software distributed under the License is distributed on an "AS IS" basis, +WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +License for the specific terms governing rights and limitations under the +License. + +The Initial Developer of this software is Andres Montano Pellegrini. Portions +created by Andres Montano Pellegrini are Copyright 2001 Andres Montano +Pellegrini. All Rights Reserved. + +Contributor(s): ______________________________________. + */ + +package org.thdl.tib.scanner; +import org.thdl.util.SimplifiedLinkedList; +import org.thdl.util.SimplifiedListIterator; +import org.thdl.util.ThdlVersion; + +/** Defines the core methods required to provide access to a dictionary; local or remote. + + @author Andrés Montano Pellegrini + */ +public abstract class TibetanScanner +{ + public static final String version = "The Tibetan to English Translation Tool, version 3.3.0 compiled on " + ThdlVersion.getTimeOfCompilation() + ". "; + public static final String copyrightUnicode="Copyright " + '\u00A9' + " 2000-2009 by Andr" + '\u00E9' + "s Montano Pellegrini, all rights reserved."; + public static final String copyrightASCII="Copyright 2000-2009 by Andres Montano Pellegrini, all rights reserved."; + public static final String copyrightHTML="


" + version + "Copyright © 2000-2009 by Andrés Montano Pellegrini. All rights reserved."; + + public static final int NORMAL_MODE=1; + public static final int DEBUG_MODE=2; + public static int mode; + + static + { + mode = NORMAL_MODE; + } + + public static final String aboutTomeraider= + "Welcome to Jeffrey Hopkins' Tibetan-Sanskrit-English Dictionary version 2.0.0!

\n" + + "This file was automatically generated using software developed by Andres Montano Pellegrini. " + + "For more information, see http://www.people.virginia.edu/~am2zb/tibetan .

" + + "Formulator and Editor: Jeffrey Hopkins
\n" + + "Contributors: Joe Wilson, Craig Preston, John Powers, Nathanial Garson, " + + "Paul Hackett, Andres Montano

" + + "A project of the Tibetan Studies Institute, Boonesville, Virginia, and the " + + "University of Virginia Tibetan Studies Program

" + + "\u00A9 Jeffrey Hopkins 1992.

" + + "Apology

" + + "This is a work in progress in crude form that is being shared with students " + + "of the Tibetan language mainly in order to receive input for further " + + "development. The English translations of the entries can be said only to " + + "represent what contributors, over a span of over thirty years, thought were " + + "my current translations. A small number are simply wrong; others need to be " + + "updated; and all will receive much more attention and, hence, detail.

\n" + + "The Dictionary has been entered into a database with fields for the entry, " + + "Sanskrit, tenses, my English, a few others’ interests, examples, " + + "definition, divisions, and comments. At this point, very few entries " + + "contain all of these items, but the plan is provide these, where " + + "appropriate, over the years. Translations for entries that have arisen from " + + "my work and from interactions with my students are in boldface, whereas " + + "those from other works are in regular type on separate lines and are marked " + + "with an initial at the end of the line. A key to these markings is given on " + + "the next page.

\n" + + "(Please note that the radical signs for Sanskrit roots are, after the first" + + "letter of the alphabet, in a state of disarray.)

\n" + + "I hope that you will bear with the many inadequacies of this first release.

\n" + + "Paul Jeffrey Hopkins
\n" + + "Professor of Tibetan Studies

\n" + + "Abbreviations

\n" + + "B-7: ??? {PH: see dngos med ... & dngos po (synonyms) }

\n" + + "BJ: Bel-jor-hlun-drup (Dpal \'byor lhun grub). Legs bshad snying po\'i dka' " + + "\'grel bstan pa\'i sgron me (Buxaduar: Sera Monastery, 1968).

\n" + + "BK: ??? {PH: see bka\' (examples) }

\n" + + "BR: Losang Gyatso (Blo bzang rgya mtsho). Presentation of Knowledge and " + + "Awareness (Blo rig).

\n" + + "BWT: Ngak-wang-bel-den (Ngag dbang dpal ldan). Annotations for " + + "[Jam-yang-shay-ba\'s] \"Tenets\" (Grub mtha\' chen mo\'i mchan).

\n" + + "C: Conze, Edward. Materials for a Dictionary of the Prajnaparamita " + + "Literature (Tokyo: Suzuki Research Foundation, 1967).

\n" + + "col.: colloquial

\n" + + "D1: Pur-bu-jok (Phur bu lcog). Presentation of the Collected Topics " + + "(Part 1: Bsdus grwa chung ngu).

\n" + + "D2: Pur-bu-jok (Phur bu lcog). Presentation of the Collected Topics " + + "(Part 2: Bsdus grwa \'bring).

\n" + + "DASI: Decisive Analysis of Special Insight.

\n" + + "DG: Germano, David. Poetic Thought, the Intelligent Universe, and the " + + "Mystery of Self: the Tantric Synthesis of rDzogs Chen in Fourteenth Century " + + "Tibet. (Ph.d. dissertation, University of Wisconsin, Madison,WI 1992).

\n" + + "DK: Dzong-ka-ba (Tsong kha pa blo bzang grags pa). Drang ba dang nges pa\'i " + + "don rnam par phye ba'i bstan bcos legs bshad snying po (Sarnath: Pleasure of " + + "Elegant Sayings Press, 1979).

\n" + + "Ganden Triba: Oral commentary of Ganden Triba Jam-bel-shen-pen.

\n" + + "GCT: Ngak-wang-dra-shi (Ngag dbang bkra shis). Collected Topics by a " + + "Spiritual Son of Jam-yang-shay-ba (Sgo mang sras bsdus grwa).

\n" + + "GD: Dreyfus, George. Ontology, Philosophy of Language, and Epistemology in " + + "Buddhist Tradition (Ph.d. dissertation. Religious Studies, University of " + + "Virginia, Charlottesville,VA 1991).

\n" + + "Gon-chok: Gon-chok-jik-may-wang-bo (Dkon mchog \'jigs med dbang po). " + + "Precious Garland of Tenets (Grub mtha\' rin chen phreng ba).

\n" + + "Jang.: Jang-gya (Lcang skya rol pa\'i rdo rje). " + + "Presentation of Tenets (Lcang skya grub mtha').

\n" + + "JKA: ??? {PH: see mngon sum (definition) }

\n" + + "KS: Khetsun Sangpo, Biographical Dictionary of Tibet and Tibetan Buddhism. " + + "(LTWA: Dharamsala, HP)

\n" + + "L: Lamotte, Etienne. Samdhinirmocana-sutra " + + "(Louvain: Universite de Louvain, 1935).

\n" + + "LAK: Jam-bel-sam-pel (\'Jam dpal bsam phel). Presentation of Awareness and " + + "Knowledge (Blo rig gi rnam bzhag).

\n" + + "Lati: Oral commentary by Lati Rinbochay.

\n" + + "LCh: Chandra, Lokesh. Tibetan-Sanskrit Dictionary (New Delhi, 1987).

\n" + + "LG: Losang Gyatso\'s Blo rig.

\n" + + "LM: ??? {PH: see skye bu chung ngu ... }

\n" + + "LR: Hopkins, Jeffrey. Glossary for Gsung rab kun gyi snying po lam rim gyi " + + "gtso bo rnam pa gsung gi khrid yid gzhan phan snying po (by Panchen Lama IV).

\n" + + "LSR: Tsul-trim-nam-gyel (Tshul khrims rnam rgyal). Presentation of Signs " + + "and Reasonings (Rtags rigs kyi rnam bzhag).

\n" + + "LWT: Lo-sang-gon-chok (Blo bzang dkon mchog). Word Commentary on the Root " + + "Text of [Jam-yang-shay-ba\'s] \"Tenets\".

\n" + + "ME: Hopkins, Jeffrey. Meditation on Emptiness (London, Wisdom, 1983).

\n" + + "MGP: ??? {PH: see bkag (examples) }

\n" + + "MSA: Nagao, Gadjin. Index to the Mahayanasutralankara (Tokyo: Nippon " + + "Gakujutsu Shinkvo-kai, 1958).

\n" + + "MSI: Dzong-ka-ba (Tsong kha pa blo bzang grags pa). Middling Exposition of " + + "Special Insight (Lhag mthong \'bring).

\n" + + "MV: Nagao, Gadjin. Index to the Madhyanta-vibhaga (Tokyo: 1961).

\n" + + "N: Zuiryu NAKAMURA. Index to the Ratnagotravibhaga-mahayanottaratantra-sastra " + + "(Tokyo, 1961).

\n" + + "P: Peking edition of the Tripitaka.

\n" + + "PGP: Lo-sang-da-yang (Blo bzang rta dbyangs). Presentation of the Grounds " + + "and Paths in Prasangika (Thal \'gyur pa\'i sa lam).

\n" + + "PP: Candrakirti. Prasannapada.

\n" + + "S: Samdhinirmocana-sutra (Tok Palace version, 160 pp., Leh, Ladakh: Shesrig " + + "Dpemzod, 1975-1980, vol. ja).

\n" + + "TAK: Pur-bu-jok (Phur bu lcog). Explanation of the Presentation of Objects " + + "and Object-Possessors as Well as Awareness and Knowledge (Yul dang yul can " + + "dang blo rig).

\n" + + "TCT: Pur-bu-jok (Phur bu lcog). Presentation of the Collected Topics (Yongs " + + "\'dzin bsdus grwa).

\n" + + "TGP: Nga-wang-bel-den (Ngag dbang dpal ldan). Treatise Illuminating the " + + "Presentation of the Four Great Secret Tantra Sets (Sngags kyi sa lam).

\n" + + "TN: Vasubandhu. Trisvabhavanirdesha.

\n" + + "VM: Bu-don-rin-chen-drup (bu ston rin chen grub), The Practice of " + + "(Jnandagarbha\'s) \"The Rite of the Vajra-Element Mandala: The Source of All " + + "Vajras\": A Precious Enhancer of Thought (rDo rje dbyings kyi dkyil \'khor gyi " + + "cho ga rdo rje thams cad \'byung ba zhes bya ba\'i lag len rin chen bsam \'phel), " + + "in Collected Works, Part 12 na. Lhasa: Zhol Printing House, 1990.

\n" + + "Y: Susumi YAMAGUCHI.Index to the Prasannapada Madhyamakavrtti. " + + "(Kyoto: Heirakuji-Shoten, 1974).

\n" + + "YT: Oral commentary by Yeshi Thupten."; + + protected SimplifiedLinkedList wordList; + + public TibetanScanner() + { + wordList = new SimplifiedLinkedList(); + } + + public void clearTokens() + { + wordList = new SimplifiedLinkedList(); + } + + public Token[] getTokenArray() + { + int n=wordList.size(); + if (n==0) return null; + Token token[] = new Token[n]; + SimplifiedListIterator li = wordList.listIterator(); + while(li.hasNext()) + token[--n] = (Token)li.next(); + return token; + } + + public SimplifiedLinkedList getTokenLinkedList() + { + return wordList; + } + + public Word[] getWordArray() + { + return getWordArray(true); + } + + public Word[] getWordArray(boolean includeRepeated) + { + Token token; + Word array[], word; + int n=0; + SimplifiedListIterator li = wordList.listIterator(); + SimplifiedLinkedList ll2, ll = new SimplifiedLinkedList(); + + while(li.hasNext()) + { + token = (Token) li.next(); + + if (token instanceof Word) + { + ll.addLast(token); + } + } + + if (includeRepeated) + { + n = ll.size(); + if (n==0) return null; + + array = new Word[n]; + li = ll.listIterator(); + + n=0; + while (li.hasNext()) + { + array[n++] = (Word) li.next(); + } + } + else + { + ll2 = new SimplifiedLinkedList(); + li = ll.listIterator(); + + while(li.hasNext()) + { + word = (Word) li.next(); + if (!ll2.contains(word)) ll2.addLast(word); + } + + n = ll2.size(); + + if (n==0) return null; + + array = new Word[n]; + li = ll2.listIterator(); + + while (li.hasNext()) + { + array[--n] = (Word) li.next(); + } + } + return array; + } + + public abstract void scanLine(String linea); + public abstract void scanBody(String linea); + public abstract void finishUp(); + public abstract BitDictionarySource getDictionarySource(); + public abstract String[] getDictionaryDescriptions(); + public abstract void destroy(); +}