Updated the translation tool to accept Tibetan Unicode. For the application version, this means that the smart paste is unicode aware and will do the appropriate conversion. In the servlet version tibetan unicode can now be inputted in the form and tibetan machine uni is used to display the results.

The Manipulate class now includes: acipToWylie, wylieToAcip, and unicodeToWylie. They provide a simple interfase to David Chandler's converters that are used by the translation tool.
This commit is contained in:
amontano 2006-04-24 06:09:17 +00:00
parent 2a47ff68a4
commit 67bddb7a7e
6 changed files with 851 additions and 717 deletions

View file

@ -1,20 +1,20 @@
/* /*
The contents of this file are subject to the AMP Open Community License The contents of this file are subject to the AMP Open Community License
Version 1.0 (the "License"); you may not use this file except in compliance Version 1.0 (the "License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License on the AMP web site with the License. You may obtain a copy of the License on the AMP web site
(http://www.tibet.iteso.mx/Guatemala/). (http://www.tibet.iteso.mx/Guatemala/).
Software distributed under the License is distributed on an "AS IS" basis, Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
License for the specific terms governing rights and limitations under the License for the specific terms governing rights and limitations under the
License. License.
The Initial Developer of this software is Andres Montano Pellegrini. Portions The Initial Developer of this software is Andres Montano Pellegrini. Portions
created by Andres Montano Pellegrini are Copyright 2001 Andres Montano created by Andres Montano Pellegrini are Copyright 2001 Andres Montano
Pellegrini. All Rights Reserved. Pellegrini. All Rights Reserved.
Contributor(s): ______________________________________. Contributor(s): ______________________________________.
*/ */
package org.thdl.tib.scanner; package org.thdl.tib.scanner;
import java.util.Enumeration; import java.util.Enumeration;
@ -26,7 +26,7 @@ import java.util.Vector;
@author Andrés Montano Pellegrini @author Andrés Montano Pellegrini
@see SyllableListTree @see SyllableListTree
*/ */
public class LocalTibetanScanner extends TibetanScanner public class LocalTibetanScanner extends TibetanScanner
{ {
public static String archivo; public static String archivo;
@ -307,7 +307,7 @@ public class LocalTibetanScanner extends TibetanScanner
return; return;
} }
outAHere: outAHere:
while(true) while(true)
{ {
doNotFinishUp=true; doNotFinishUp=true;

View file

@ -17,6 +17,10 @@ Contributor(s): ______________________________________.
*/ */
package org.thdl.tib.scanner; package org.thdl.tib.scanner;
import org.thdl.tib.text.*;
import org.thdl.tib.text.reverter.*;
/** Miscelaneous static methods for the manipulation of Tibetan text. /** Miscelaneous static methods for the manipulation of Tibetan text.
@author Andrés Montano Pellegrini @author Andrés Montano Pellegrini
@ -24,7 +28,6 @@ package org.thdl.tib.scanner;
public class Manipulate public class Manipulate
{ {
private static String endOfParagraphMarks = "/;|!:^@#$%="; private static String endOfParagraphMarks = "/;|!:^@#$%=";
private static String bracketMarks = "<>(){}[]"; private static String bracketMarks = "<>(){}[]";
private static String endOfSyllableMarks = " _\t"; private static String endOfSyllableMarks = " _\t";
@ -179,69 +182,6 @@ public class Manipulate
return ch=='a' || ch=='e' || ch=='i' || ch=='o' || ch=='u'; return ch=='a' || ch=='e' || ch=='i' || ch=='o' || ch=='u';
} }
public static String wylieToAcip(String palabra)
{
// DLC FIXME: for unknown things, return null.
if (palabra.equals("@##")) return "#";
if (palabra.equals("@#")) return "*";
if (palabra.equals("!")) return "`";
if (palabra.equals("b+h")) return "BH";
if (palabra.equals("d+h")) return "DH";
if (palabra.equals("X")) return null;
if (palabra.equals("iA")) return null;
if (palabra.equals("ai")) return "EE";
if (palabra.equals("au")) return "OO";
if (palabra.equals("$")) return null;
if (palabra.startsWith("@") || palabra.startsWith("#"))
return null; // we can't convert this in isolation! We need context.
char []caract;
int i, j, len;
String nuevaPalabra;
caract = palabra.toCharArray();
len = palabra.length();
for (j=0; j<len; j++)
{
i = j;
/*ciclo:
while(true) // para manejar excepciones; que honda!
{
switch(caract[i])
{
case 'A':
if (i>0)
{
i--;
break;
}
default:*/
if (Character.isLowerCase(caract[i]))
caract[i] = Character.toUpperCase(caract[i]);
else if (Character.isUpperCase(caract[i]))
caract[i] = Character.toLowerCase(caract[i]);
/* break ciclo;
}
}*/
}
nuevaPalabra = new String(caract);
// nuevaPalabra = palabra.toUpperCase();
// ahora hacer los cambios de Michael Roach
nuevaPalabra = replace(nuevaPalabra, "TSH", "TQQ");
nuevaPalabra = replace(nuevaPalabra, "TS", "TZ");
nuevaPalabra = replace(nuevaPalabra, "TQQ", "TS");
nuevaPalabra = replace(nuevaPalabra, "a", "'A");
nuevaPalabra = replace(nuevaPalabra, "i", "'I");
nuevaPalabra = replace(nuevaPalabra, "u", "'U");
nuevaPalabra = replace(nuevaPalabra, "-I", "i");
nuevaPalabra = replace(nuevaPalabra, "/", ",");
nuevaPalabra = replace(nuevaPalabra, "_", " ");
nuevaPalabra = replace(nuevaPalabra, "|", ";");
nuevaPalabra = fixWazur(nuevaPalabra);
return nuevaPalabra;
}
/** If more than half of the first letters among the first are 10 characters /** If more than half of the first letters among the first are 10 characters
are uppercase assume its acip */ are uppercase assume its acip */
public static boolean guessIfAcip(String line) public static boolean guessIfAcip(String line)
@ -263,123 +203,28 @@ public class Manipulate
else return (letters / upperCase < 2); else return (letters / upperCase < 2);
} }
public static String acipToWylie(String linea) public static boolean isTibetanUnicodeCharacter(char ch)
{ {
char caract[], ch, chP, chN; return ch>=0xF00 && ch<=0xFFF;
String nuevaLinea;
int i, len;
boolean open;
caract = linea.toCharArray();
len = linea.length();
for (i=0; i<len; i++)
{
if (Character.isLowerCase(caract[i]))
caract[i] = Character.toUpperCase(caract[i]);
else if (Character.isUpperCase(caract[i]))
caract[i] = Character.toLowerCase(caract[i]);
}
nuevaLinea = new String(caract);
/* ahora hacer los cambios de Michael Roach ts -> tsh, tz -> ts, v -> w,
TH -> Th, kSH, kaSH -> k+Sh, SH -> Sh, : -> H, dh -> d+h, gh -> g+h, bh -> b+h, dzh -> dz+h,
aa -> a, a'a -> A, ai->i, aee ->ai, au->u, aoo->au, ae->e,
ao->o, ee->ai, oo->au, 'I->-I I->-i, a'i->I, a'u->U, a'e->E, a'o->O,
a'i->I, a'u->U, a'e->E, a'o->O, ,->/, # -> @##, * -> @#, \ -> ?, ` -> !,
/-/ -> (-), ga-y -> g.y, g-y -> g.y, na-y -> n+y */
nuevaLinea = replace(nuevaLinea, "ts", "tq");
nuevaLinea = replace(nuevaLinea, "tz", "ts");
nuevaLinea = replace(nuevaLinea, "tq", "tsh");
nuevaLinea = replace(nuevaLinea, "v", "w");
nuevaLinea = replace(nuevaLinea, "TH", "Th");
nuevaLinea = replace(nuevaLinea, "kSH", "k+Sh");
nuevaLinea = replace(nuevaLinea, "kaSH", "k+Sh");
nuevaLinea = replace(nuevaLinea, "SH", "Sh");
nuevaLinea = replace(nuevaLinea, ":", "H");
nuevaLinea = replace(nuevaLinea, "NH", "NaH");
nuevaLinea = replace(nuevaLinea, "dh", "d+h");
nuevaLinea = replace(nuevaLinea, "gh", "g+h");
nuevaLinea = replace(nuevaLinea, "bh", "b+h");
nuevaLinea = replace(nuevaLinea, "dzh", "dz+h");
nuevaLinea = replace(nuevaLinea, "aa", "a");
nuevaLinea = replace(nuevaLinea, "ai", "i");
nuevaLinea = replace(nuevaLinea, "aee", "ai");
nuevaLinea = replace(nuevaLinea, "au", "u");
nuevaLinea = replace(nuevaLinea, "aoo", "au");
nuevaLinea = replace(nuevaLinea, "ae", "e");
nuevaLinea = replace(nuevaLinea, "ao", "o");
nuevaLinea = replace(nuevaLinea, "ee", "ai");
nuevaLinea = replace(nuevaLinea, "oo", "au");
nuevaLinea = replace(nuevaLinea, "\'I", "\'q");
nuevaLinea = replace(nuevaLinea, "I", "-i");
nuevaLinea = replace(nuevaLinea, "\'q", "-I");
nuevaLinea = replace(nuevaLinea, "\\", "?");
nuevaLinea = replace(nuevaLinea, "`", "!");
nuevaLinea = replace(nuevaLinea, "ga-y", "g.y");
nuevaLinea = replace(nuevaLinea, "g-y", "g.y");
nuevaLinea = replace(nuevaLinea, "na-y", "n+y");
len = nuevaLinea.length();
for (i=0; i<len; i++)
{
ch = nuevaLinea.charAt(i);
switch(ch)
{
case '#':
nuevaLinea = nuevaLinea.substring(0,i) + "@##" + nuevaLinea.substring(i+1);
i+=3;
len+=2;
break;
case '*':
nuevaLinea = nuevaLinea.substring(0,i) + "@#" + nuevaLinea.substring(i+1);
i+=2;
len++;
break;
case '\'':
if (i>0 && i<len-1)
{
chP = nuevaLinea.charAt(i-1);
chN = nuevaLinea.charAt(i+1);
if (isVowel(chN))
{
if (Character.isLetter(chP) && !isVowel(chP))
{
nuevaLinea = nuevaLinea.substring(0, i) + Character.toUpperCase(chN) + nuevaLinea.substring(i+2);
len--;
}
else if (chP=='a' && (i==1 || i>1 && !Character.isLetter(nuevaLinea.charAt(i-2)) || chN == 'a' && (i+2==len || !Character.isLetter(nuevaLinea.charAt(i+2)))))
{
nuevaLinea = nuevaLinea.substring(0,i-1) + Character.toUpperCase(chN) + nuevaLinea.substring(i+2);
len-=2;
}
}
}
}
} }
open = false; public static boolean guessIfUnicode(String line)
for (i=0; i<len; i++)
{ {
ch = nuevaLinea.charAt(i); char ch;
if (ch=='/') int letters=0, unicode=0, i, n;
n = line.length();
if (n>10) n = 10;
for (i=0; i<n; i++)
{ {
if (open) ch = line.charAt(i);
if (Character.isLetter(ch))
{ {
nuevaLinea = nuevaLinea.substring(0, i) + ")" + nuevaLinea.substring(i+1); letters++;
open = false; if (isTibetanUnicodeCharacter(ch)) unicode++;
}
else
{
nuevaLinea = nuevaLinea.substring(0, i) + "(" + nuevaLinea.substring(i+1);
open = true;
} }
} }
} if (letters==0 || unicode==0) return false;
nuevaLinea = replace(nuevaLinea, ",", "/"); else return (letters / unicode < 2);
return nuevaLinea;
} }
public static String fixWazur(String linea) public static String fixWazur(String linea)
@ -530,4 +375,274 @@ public class Manipulate
} }
if (psPalabras!=null) psPalabras.flush(); if (psPalabras!=null) psPalabras.flush();
}*/ }*/
public static String acipToWylie(String acip)
{
TibetanDocument tibDoc = new TibetanDocument();
try
{
TibTextUtils.insertTibetanMachineWebForTranslit(false, acip, tibDoc, 0, false);
}
catch (InvalidTransliterationException e)
{
return null;
}
return tibDoc.getWylie(new boolean[] { false });
/* char caract[], ch, chP, chN;
String nuevaLinea;
int i, len;
boolean open;
caract = acip.toCharArray();
len = acip.length();
for (i=0; i<len; i++)
{
if (Character.isLowerCase(caract[i]))
caract[i] = Character.toUpperCase(caract[i]);
else if (Character.isUpperCase(caract[i]))
caract[i] = Character.toLowerCase(caract[i]);
}
nuevaLinea = new String(caract);
/* ahora hacer los cambios de Michael Roach ts -> tsh, tz -> ts, v -> w,
TH -> Th, kSH, kaSH -> k+Sh, SH -> Sh, : -> H, dh -> d+h, gh -> g+h, bh -> b+h, dzh -> dz+h,
aa -> a, a'a -> A, ai->i, aee ->ai, au->u, aoo->au, ae->e,
ao->o, ee->ai, oo->au, 'I->-I I->-i, a'i->I, a'u->U, a'e->E, a'o->O,
a'i->I, a'u->U, a'e->E, a'o->O, ,->/, # -> @##, * -> @#, \ -> ?, ` -> !,
/-/ -> (-), ga-y -> g.y, g-y -> g.y, na-y -> n+y
nuevaLinea = replace(nuevaLinea, "ts", "tq");
nuevaLinea = replace(nuevaLinea, "tz", "ts");
nuevaLinea = replace(nuevaLinea, "tq", "tsh");
nuevaLinea = replace(nuevaLinea, "v", "w");
nuevaLinea = replace(nuevaLinea, "TH", "Th");
nuevaLinea = replace(nuevaLinea, "kSH", "k+Sh");
nuevaLinea = replace(nuevaLinea, "kaSH", "k+Sh");
nuevaLinea = replace(nuevaLinea, "SH", "Sh");
nuevaLinea = replace(nuevaLinea, ":", "H");
nuevaLinea = replace(nuevaLinea, "NH", "NaH");
nuevaLinea = replace(nuevaLinea, "dh", "d+h");
nuevaLinea = replace(nuevaLinea, "gh", "g+h");
nuevaLinea = replace(nuevaLinea, "bh", "b+h");
nuevaLinea = replace(nuevaLinea, "dzh", "dz+h");
nuevaLinea = replace(nuevaLinea, "aa", "a");
nuevaLinea = replace(nuevaLinea, "ai", "i");
nuevaLinea = replace(nuevaLinea, "aee", "ai");
nuevaLinea = replace(nuevaLinea, "au", "u");
nuevaLinea = replace(nuevaLinea, "aoo", "au");
nuevaLinea = replace(nuevaLinea, "ae", "e");
nuevaLinea = replace(nuevaLinea, "ao", "o");
nuevaLinea = replace(nuevaLinea, "ee", "ai");
nuevaLinea = replace(nuevaLinea, "oo", "au");
nuevaLinea = replace(nuevaLinea, "\'I", "\'q");
nuevaLinea = replace(nuevaLinea, "I", "-i");
nuevaLinea = replace(nuevaLinea, "\'q", "-I");
nuevaLinea = replace(nuevaLinea, "\\", "?");
nuevaLinea = replace(nuevaLinea, "`", "!");
nuevaLinea = replace(nuevaLinea, "ga-y", "g.y");
nuevaLinea = replace(nuevaLinea, "g-y", "g.y");
nuevaLinea = replace(nuevaLinea, "na-y", "n+y");
len = nuevaLinea.length();
for (i=0; i<len; i++)
{
ch = nuevaLinea.charAt(i);
switch(ch)
{
case '#':
nuevaLinea = nuevaLinea.substring(0,i) + "@##" + nuevaLinea.substring(i+1);
i+=3;
len+=2;
break;
case '*':
nuevaLinea = nuevaLinea.substring(0,i) + "@#" + nuevaLinea.substring(i+1);
i+=2;
len++;
break;
case '\'':
if (i>0 && i<len-1)
{
chP = nuevaLinea.charAt(i-1);
chN = nuevaLinea.charAt(i+1);
if (isVowel(chN))
{
if (Character.isLetter(chP) && !isVowel(chP))
{
nuevaLinea = nuevaLinea.substring(0, i) + Character.toUpperCase(chN) + nuevaLinea.substring(i+2);
len--;
}
else if (chP=='a' && (i==1 || i>1 && !Character.isLetter(nuevaLinea.charAt(i-2)) || chN == 'a' && (i+2==len || !Character.isLetter(nuevaLinea.charAt(i+2)))))
{
nuevaLinea = nuevaLinea.substring(0,i-1) + Character.toUpperCase(chN) + nuevaLinea.substring(i+2);
len-=2;
}
}
}
}
}
open = false;
for (i=0; i<len; i++)
{
ch = nuevaLinea.charAt(i);
if (ch=='/')
{
if (open)
{
nuevaLinea = nuevaLinea.substring(0, i) + ")" + nuevaLinea.substring(i+1);
open = false;
}
else
{
nuevaLinea = nuevaLinea.substring(0, i) + "(" + nuevaLinea.substring(i+1);
open = true;
}
}
}
nuevaLinea = replace(nuevaLinea, ",", "/");
return nuevaLinea; */
}
public static String wylieToAcip(String wylie)
{
TibetanDocument tibDoc = new TibetanDocument();
try
{
TibTextUtils.insertTibetanMachineWebForTranslit(false, wylie, tibDoc, 0, false);
}
catch (InvalidTransliterationException e)
{
return null;
}
return tibDoc.getACIP(new boolean[] { false });
/* DLC FIXME: for unknown things, return null.
if (wylie.equals("@##")) return "#";
if (wylie.equals("@#")) return "*";
if (wylie.equals("!")) return "`";
if (wylie.equals("b+h")) return "BH";
if (wylie.equals("d+h")) return "DH";
if (wylie.equals("X")) return null;
if (wylie.equals("iA")) return null;
if (wylie.equals("ai")) return "EE";
if (wylie.equals("au")) return "OO";
if (wylie.equals("$")) return null;
if (wylie.startsWith("@") || wylie.startsWith("#"))
return null; // we can't convert this in isolation! We need context.
char []caract;
int i, j, len;
String nuevaPalabra;
caract = wylie.toCharArray();
len = wylie.length();
for (j=0; j<len; j++)
{
i = j;
/*ciclo:
while(true) // para manejar excepciones; que honda!
{
switch(caract[i])
{
case 'A':
if (i>0)
{
i--;
break;
}
default:
if (Character.isLowerCase(caract[i]))
caract[i] = Character.toUpperCase(caract[i]);
else if (Character.isUpperCase(caract[i]))
caract[i] = Character.toLowerCase(caract[i]);
/* break ciclo;
}
}
}
nuevaPalabra = new String(caract);
// nuevaPalabra = palabra.toUpperCase();
// ahora hacer los cambios de Michael Roach
nuevaPalabra = replace(nuevaPalabra, "TSH", "TQQ");
nuevaPalabra = replace(nuevaPalabra, "TS", "TZ");
nuevaPalabra = replace(nuevaPalabra, "TQQ", "TS");
nuevaPalabra = replace(nuevaPalabra, "a", "'A");
nuevaPalabra = replace(nuevaPalabra, "i", "'I");
nuevaPalabra = replace(nuevaPalabra, "u", "'U");
nuevaPalabra = replace(nuevaPalabra, "-I", "i");
nuevaPalabra = replace(nuevaPalabra, "/", ",");
nuevaPalabra = replace(nuevaPalabra, "_", " ");
nuevaPalabra = replace(nuevaPalabra, "|", ";");
nuevaPalabra = fixWazur(nuevaPalabra);
return nuevaPalabra; */
}
public static String unicodeToWylie(String unicode)
{
String machineWylie;
TibetanDocument tibDoc = new TibetanDocument();
StringBuffer errors = new StringBuffer();
machineWylie = Converter.convertToEwtsForComputers(unicode, errors);
try
{
TibTextUtils.insertTibetanMachineWebForTranslit(true, machineWylie, tibDoc, 0, false);
}
catch (InvalidTransliterationException e)
{
return null;
}
return tibDoc.getWylie(new boolean[] { false });
}
/** From http://www.i18nfaq.com/2005/07/how-do-i-convert-ncr-format-to-java.html */
public static String NCR2UnicodeString(String str)
{
StringBuffer ostr = new StringBuffer();
int i1=0;
int i2=0;
while(i2<str.length())
{
i1 = str.indexOf("&#",i2);
if (i1 == -1 ) {
ostr.append(str.substring(i2, str.length()));
break ;
}
ostr.append(str.substring(i2, i1));
i2 = str.indexOf(";", i1);
if (i2 == -1 ) {
ostr.append(str.substring(i1, str.length()));
break ;
}
String tok = str.substring(i1+2, i2);
try {
int radix = 10 ;
if (tok.trim().charAt(0) == 'x') {
radix = 16 ;
tok = tok.substring(1,tok.length());
}
ostr.append((char) Integer.parseInt(tok, radix));
} catch (NumberFormatException exp) {
ostr.append('?') ;
}
i2++ ;
}
return new String(ostr) ;
}
public static String UnicodeString2NCR(String str)
{
StringBuffer ncr = new StringBuffer();
int i;
for (i=0; i<str.length(); i++)
{
ncr.append("&#" + Integer.toString(str.charAt(i)) + ";");
}
return ncr.toString();
}
} }

View file

@ -1,20 +1,20 @@
/* /*
The contents of this file are subject to the AMP Open Community License The contents of this file are subject to the AMP Open Community License
Version 1.0 (the "License"); you may not use this file except in compliance Version 1.0 (the "License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License on the AMP web site with the License. You may obtain a copy of the License on the AMP web site
(http://www.tibet.iteso.mx/Guatemala/). (http://www.tibet.iteso.mx/Guatemala/).
Software distributed under the License is distributed on an "AS IS" basis, Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
License for the specific terms governing rights and limitations under the License for the specific terms governing rights and limitations under the
License. License.
The Initial Developer of this software is Andres Montano Pellegrini. Portions The Initial Developer of this software is Andres Montano Pellegrini. Portions
created by Andres Montano Pellegrini are Copyright 2001 Andres Montano created by Andres Montano Pellegrini are Copyright 2001 Andres Montano
Pellegrini. All Rights Reserved. Pellegrini. All Rights Reserved.
Contributor(s): ______________________________________. Contributor(s): ______________________________________.
*/ */
package org.thdl.tib.scanner; package org.thdl.tib.scanner;
@ -35,7 +35,7 @@ import org.thdl.util.ThdlOptions;
by the browser. Requires no additional software installed on the client. by the browser. Requires no additional software installed on the client.
@author Andr&eacute;s Montano Pellegrini @author Andr&eacute;s Montano Pellegrini
*/ */
public class OnLineScannerFilter extends HttpServlet public class OnLineScannerFilter extends HttpServlet
{ {
private final static String propertyFile = "dictionary"; private final static String propertyFile = "dictionary";
@ -105,8 +105,8 @@ public class OnLineScannerFilter extends HttpServlet
{ {
out.println(" <title>Tibetan and Himalayan Digital Library - The Online Tibetan to English Translation/Dictionary Tool</title>"); out.println(" <title>Tibetan and Himalayan Digital Library - The Online Tibetan to English Translation/Dictionary Tool</title>");
out.println(" <meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />"); out.println(" <meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />");
out.println(" <script type=\"text/javascript\" src=\"/thdl/scripts/thdl_scripts.js\"></script>"); out.println(" <script type=\"text/javascript\" src=\"http://www.thdl.org/scripts/thdl_scripts.js\"></script>");
out.println(" <link rel=\"stylesheet\" type=\"text/css\" href=\"/thdl/style/thdl-styles.css\"/>"); out.println(" <link rel=\"stylesheet\" type=\"text/css\" href=\"http://www.thdl.org/style/thdl-styles.css\"/>");
} }
else else
out.println(" <title>The Online Tibetan to English Translation/Dictionary Tool</title>"); out.println(" <title>The Online Tibetan to English Translation/Dictionary Tool</title>");
@ -121,7 +121,7 @@ public class OnLineScannerFilter extends HttpServlet
script!=null && makes default roman script!=null && makes default roman
*/ */
wantsTibetan = (answer==null || answer.equals(tibetanStr)); wantsTibetan = (answer==null || answer.equals(tibetanStr));
if (wantsTibetan) /*if (wantsTibetan)
{ {
out.println("<style>.tmw {font: 28pt TibetanMachineWeb}"); out.println("<style>.tmw {font: 28pt TibetanMachineWeb}");
out.println(".tmw1 {font: 28pt TibetanMachineWeb1}"); out.println(".tmw1 {font: 28pt TibetanMachineWeb1}");
@ -134,7 +134,7 @@ public class OnLineScannerFilter extends HttpServlet
out.println(".tmw8 {font: 28pt TibetanMachineWeb8}"); out.println(".tmw8 {font: 28pt TibetanMachineWeb8}");
out.println(".tmw9 {font: 28pt TibetanMachineWeb9}"); out.println(".tmw9 {font: 28pt TibetanMachineWeb9}");
out.println("</style>"); out.println("</style>");
} }*/
out.println("</head>"); out.println("</head>");
out.println("<body>"); out.println("<body>");
@ -184,7 +184,7 @@ public class OnLineScannerFilter extends HttpServlet
out.println(" <td width=\"75%\">"); out.println(" <td width=\"75%\">");
out.println(" <p><input type=\"radio\" value=\"" + tibetanStr + "\" "); out.println(" <p><input type=\"radio\" value=\"" + tibetanStr + "\" ");
if (wantsTibetan) out.println("checked "); if (wantsTibetan) out.println("checked ");
out.println("name=\"" + scriptStr + "\">Tibetan script (using <a href=\"http://orion.lib.virginia.edu/thdl/tools/tmw.html\" target=\"_blank\">Tibetan Machine Web font</a>)<br/>"); out.println("name=\"" + scriptStr + "\">Tibetan script (using <a href=\"http://www.thdl.org/xml/show.php?xml=/tools/tibfonts.xml&l=uva10928423419921\" target=\"_blank\">Tibetan Machine Uni font</a>)<br/>");
out.println(" <input type=\"radio\" value=\"roman\" "); out.println(" <input type=\"radio\" value=\"roman\" ");
if (!wantsTibetan) out.println("checked "); if (!wantsTibetan) out.println("checked ");
out.println("name=\"" + scriptStr + "\">Roman script</td>"); out.println("name=\"" + scriptStr + "\">Roman script</td>");
@ -197,7 +197,7 @@ public class OnLineScannerFilter extends HttpServlet
ds = scanner.getDictionarySource(); ds = scanner.getDictionarySource();
ds.reset(); ds.reset();
checkedDicts = new boolean[dictionaries.length]; checkedDicts = new boolean[dictionaries.length];
/* out.println(" <tr>"); /* out.println(" <tr>");
out.println("<td width=\""+ percent +"%\">Search in dictionaries:</td>");*/ out.println("<td width=\""+ percent +"%\">Search in dictionaries:</td>");*/
out.println("<p>Search in dictionaries: "); out.println("<p>Search in dictionaries: ");
allUnchecked=true; allUnchecked=true;
@ -255,7 +255,9 @@ public class OnLineScannerFilter extends HttpServlet
out.println(" </tr>"); out.println(" </tr>");
out.println("</table>"); out.println("</table>");
out.println("<textarea rows=\"12\" name=\"parrafo\" cols=\"60\">"); out.print("<textarea rows=\"5\" name=\"parrafo\" cols=\"40\"");
if (wantsTibetan) out.print(" class=\"tib\"");
out.println(">");
// Paragraph should be empty if the user just clicked the clear button // Paragraph should be empty if the user just clicked the clear button
answer = request.getParameter(buttonStr); answer = request.getParameter(buttonStr);
@ -314,6 +316,9 @@ public class OnLineScannerFilter extends HttpServlet
init = fin+1; init = fin+1;
} */ } */
scanner.clearTokens(); scanner.clearTokens();
in = Manipulate.NCR2UnicodeString(in);
if (Manipulate.guessIfUnicode(in)) in = Manipulate.unicodeToWylie(in);
else if (Manipulate.guessIfAcip(in)) in = Manipulate.acipToWylie(in);
scanner.scanBody(in); scanner.scanBody(in);
scanner.finishUp(); scanner.finishUp();
printText(pw, tibetan); printText(pw, tibetan);
@ -339,7 +344,7 @@ public class OnLineScannerFilter extends HttpServlet
{ {
word = new SwingWord((Word)words[i]); word = new SwingWord((Word)words[i]);
// if (word.getDefs().getDictionarySource()!=null) // if (word.getDefs().getDictionarySource()!=null)
pw.print(word.getLink()); pw.print(word.getLink(tibetan));
// else pw.print(word.getWylie() + " "); // else pw.print(word.getWylie() + " ");
} }
else else
@ -407,9 +412,10 @@ public class OnLineScannerFilter extends HttpServlet
} }
} }
pw.println(" <td width=\"20%\" rowspan=\"" + defs.def.length pw.print(" <td width=\"20%\" rowspan=\"" + defs.def.length
+ "\" valign=\"top\">" + word.getBookmark(tibetan) + "\" valign=\"top\"");
+ "</td>"); if (tibetan) pw.print(" class=\"tib\"");
pw.println(">" + word.getBookmark(tibetan) + "</td>");
pw.println(" <td width=\"12%\">" + tag + "</td>"); pw.println(" <td width=\"12%\">" + tag + "</td>");
pw.println(" <td width=\"68%\">" + defs.def[0] + "</td>"); pw.println(" <td width=\"68%\">" + defs.def[0] + "</td>");

View file

@ -1,20 +1,20 @@
/* /*
The contents of this file are subject to the AMP Open Community License The contents of this file are subject to the AMP Open Community License
Version 1.0 (the "License"); you may not use this file except in compliance Version 1.0 (the "License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License on the AMP web site with the License. You may obtain a copy of the License on the AMP web site
(http://www.tibet.iteso.mx/Guatemala/). (http://www.tibet.iteso.mx/Guatemala/).
Software distributed under the License is distributed on an "AS IS" basis, Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
License for the specific terms governing rights and limitations under the License for the specific terms governing rights and limitations under the
License. License.
The Initial Developer of this software is Andres Montano Pellegrini. Portions The Initial Developer of this software is Andres Montano Pellegrini. Portions
created by Andres Montano Pellegrini are Copyright 2001 Andres Montano created by Andres Montano Pellegrini are Copyright 2001 Andres Montano
Pellegrini. All Rights Reserved. Pellegrini. All Rights Reserved.
Contributor(s): ______________________________________. Contributor(s): ______________________________________.
*/ */
package org.thdl.tib.scanner; package org.thdl.tib.scanner;
@ -29,6 +29,7 @@ import javax.swing.text.BadLocationException;
import org.thdl.tib.input.DuffPane; import org.thdl.tib.input.DuffPane;
import org.thdl.tib.text.TibetanDocument; import org.thdl.tib.text.TibetanDocument;
import org.thdl.tib.text.reverter.*;
import org.thdl.util.RTFFixerInputStream; import org.thdl.util.RTFFixerInputStream;
import org.thdl.util.ThdlDebug; import org.thdl.util.ThdlDebug;
import org.thdl.util.ThdlOptions; import org.thdl.util.ThdlOptions;
@ -39,7 +40,7 @@ import org.thdl.util.ThdlOptions;
accordingly to TibetanMachineWeb. If text in TibetanMachine is pasted, accordingly to TibetanMachineWeb. If text in TibetanMachine is pasted,
it is converted to TibetanMachineWeb. Any other font is assumed to be it is converted to TibetanMachineWeb. Any other font is assumed to be
Roman script. Roman script.
*/ */
public class StrictDuffPane extends DuffPane public class StrictDuffPane extends DuffPane
{ {
public StrictDuffPane() public StrictDuffPane()
@ -54,6 +55,8 @@ public class StrictDuffPane extends DuffPane
*/ */
public void paste(int offset) public void paste(int offset)
{ {
boolean pasteAsString = false;
// Respect setEditable(boolean): // Respect setEditable(boolean):
if (!this.isEditable()) if (!this.isEditable())
return; return;
@ -90,9 +93,7 @@ public class StrictDuffPane extends DuffPane
*/ */
if (contents.isDataFlavorSupported(DataFlavor.stringFlavor)) if (contents.isDataFlavorSupported(DataFlavor.stringFlavor))
{ {
String data = (String)contents.getTransferData(DataFlavor.stringFlavor); pasteAsString = true;
if (Manipulate.guessIfAcip(data)) data = Manipulate.acipToWylie(data);
toTibetanMachineWeb(data, offset);
} }
// JOptionPane.showMessageDialog(this, "You cannot paste from the application from which you copied.\nIt uses an RTF format that is too advanced for the version\nof Java Jskad is running atop."); // JOptionPane.showMessageDialog(this, "You cannot paste from the application from which you copied.\nIt uses an RTF format that is too advanced for the version\nof Java Jskad is running atop.");
} }
@ -105,9 +106,7 @@ public class StrictDuffPane extends DuffPane
if (!sd.getFont((sd.getCharacterElement(0).getAttributes())).getFamily().startsWith("TibetanMachine") if (!sd.getFont((sd.getCharacterElement(0).getAttributes())).getFamily().startsWith("TibetanMachine")
&& contents.isDataFlavorSupported(DataFlavor.stringFlavor)) && contents.isDataFlavorSupported(DataFlavor.stringFlavor))
{ {
String data = (String)contents.getTransferData(DataFlavor.stringFlavor); pasteAsString = true;
if (Manipulate.guessIfAcip(data)) data = Manipulate.acipToWylie(data);
toTibetanMachineWeb(data, offset);
} }
else else
{ {
@ -137,10 +136,20 @@ public class StrictDuffPane extends DuffPane
else if (contents.isDataFlavorSupported(DataFlavor.stringFlavor)) else if (contents.isDataFlavorSupported(DataFlavor.stringFlavor))
{ {
// if it is not in a font, assume it is wylie or ACIP. // if it is not in a font, assume it is wylie or ACIP.
pasteAsString = true;
}
if (pasteAsString)
{
String data = (String)contents.getTransferData(DataFlavor.stringFlavor); String data = (String)contents.getTransferData(DataFlavor.stringFlavor);
if (Manipulate.guessIfAcip(data)) data = Manipulate.acipToWylie(data); if (Manipulate.guessIfUnicode(data))
{
StringBuffer errors = new StringBuffer();
data = Converter.convertToEwtsForComputers(data, errors);
} else if (Manipulate.guessIfAcip(data)) data = Manipulate.acipToWylie(data);
toTibetanMachineWeb(data, offset); toTibetanMachineWeb(data, offset);
} }
} catch (UnsupportedFlavorException ufe) { } catch (UnsupportedFlavorException ufe) {
ufe.printStackTrace(); ufe.printStackTrace();
ThdlDebug.noteIffyCode(); ThdlDebug.noteIffyCode();

View file

@ -20,7 +20,8 @@ Contributor(s): ______________________________________.
to store the dictionary. */ to store the dictionary. */
package org.thdl.tib.scanner; package org.thdl.tib.scanner;
import org.thdl.tib.text.TibetanHTML; //import org.thdl.tib.text.TibetanHTML;
import org.thdl.tib.text.ttt.*;
/** Tibetan word with its corresponding definitions. /** Tibetan word with its corresponding definitions.
@ -60,7 +61,8 @@ public class SwingWord extends Word
{ {
try try
{ {
localWord = TibetanHTML.getHTML(super.token + " "); // localWord = TibetanHTML.getHTML(super.token + " ");
localWord = Manipulate.UnicodeString2NCR(EwtsToUnicodeForXslt.convertEwtsTo(super.token + " "));
} }
catch (Exception e) catch (Exception e)
{ {
@ -82,6 +84,7 @@ public class SwingWord extends Word
public String getLink(boolean tibetan) public String getLink(boolean tibetan)
{ {
String localWord, result=null; String localWord, result=null;
String className = "";
if (wordSinDec==null) localWord = super.token; if (wordSinDec==null) localWord = super.token;
else localWord = wordSinDec; else localWord = wordSinDec;
@ -89,7 +92,8 @@ public class SwingWord extends Word
{ {
try try
{ {
result = TibetanHTML.getHTML(localWord + " "); result = Manipulate.UnicodeString2NCR(EwtsToUnicodeForXslt.convertEwtsTo(localWord + " "));
className = " class = \"tib\"";
} }
catch (Exception e) catch (Exception e)
{ {
@ -101,6 +105,6 @@ public class SwingWord extends Word
if (tibetan) result+= "</a>"; if (tibetan) result+= "</a>";
else result+= "</a> "; else result+= "</a> ";
return result;*/ return result;*/
return "<a href=\"#" + super.token + "\">" + result + "</a> "; return "<a href=\"#" + super.token + "\"" + className + ">" + result + "</a> ";
} }
} }

View file

@ -27,7 +27,7 @@ import org.thdl.util.ThdlVersion;
*/ */
public abstract class TibetanScanner public abstract class TibetanScanner
{ {
public static final String version = "The Tibetan to English Translation Tool, version 3.2.1 compiled on " + ThdlVersion.getTimeOfCompilation() + ". "; public static final String version = "The Tibetan to English Translation Tool, version 3.3.0 compiled on " + ThdlVersion.getTimeOfCompilation() + ". ";
public static final String copyrightUnicode="Copyright " + '\u00A9' + " 2000-2005 by Andr" + '\u00E9' + "s Montano Pellegrini, all rights reserved."; public static final String copyrightUnicode="Copyright " + '\u00A9' + " 2000-2005 by Andr" + '\u00E9' + "s Montano Pellegrini, all rights reserved.";
public static final String copyrightASCII="Copyright 2000-2005 by Andres Montano Pellegrini, all rights reserved."; public static final String copyrightASCII="Copyright 2000-2005 by Andres Montano Pellegrini, all rights reserved.";
public static final String copyrightHTML="<hr><small><strong>" + version + "Copyright &copy; 2000-2005 by <a href=\"http://www.people.virginia.edu/~am2zb/\" target=\"_blank\">Andr&eacute;s Montano Pellegrini.</a><br/>All rights reserved.</strong></small>"; public static final String copyrightHTML="<hr><small><strong>" + version + "Copyright &copy; 2000-2005 by <a href=\"http://www.people.virginia.edu/~am2zb/\" target=\"_blank\">Andr&eacute;s Montano Pellegrini.</a><br/>All rights reserved.</strong></small>";