Updated the translation tool to accept Tibetan Unicode. For the application version, this means that the smart paste is unicode aware and will do the appropriate conversion. In the servlet version tibetan unicode can now be inputted in the form and tibetan machine uni is used to display the results.
The Manipulate class now includes: acipToWylie, wylieToAcip, and unicodeToWylie. They provide a simple interfase to David Chandler's converters that are used by the translation tool.
This commit is contained in:
parent
2a47ff68a4
commit
67bddb7a7e
6 changed files with 851 additions and 717 deletions
|
@ -1,20 +1,20 @@
|
|||
/*
|
||||
The contents of this file are subject to the AMP Open Community License
|
||||
Version 1.0 (the "License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License on the AMP web site
|
||||
(http://www.tibet.iteso.mx/Guatemala/).
|
||||
The contents of this file are subject to the AMP Open Community License
|
||||
Version 1.0 (the "License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License on the AMP web site
|
||||
(http://www.tibet.iteso.mx/Guatemala/).
|
||||
|
||||
Software distributed under the License is distributed on an "AS IS" basis,
|
||||
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
|
||||
License for the specific terms governing rights and limitations under the
|
||||
License.
|
||||
Software distributed under the License is distributed on an "AS IS" basis,
|
||||
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
|
||||
License for the specific terms governing rights and limitations under the
|
||||
License.
|
||||
|
||||
The Initial Developer of this software is Andres Montano Pellegrini. Portions
|
||||
created by Andres Montano Pellegrini are Copyright 2001 Andres Montano
|
||||
Pellegrini. All Rights Reserved.
|
||||
The Initial Developer of this software is Andres Montano Pellegrini. Portions
|
||||
created by Andres Montano Pellegrini are Copyright 2001 Andres Montano
|
||||
Pellegrini. All Rights Reserved.
|
||||
|
||||
Contributor(s): ______________________________________.
|
||||
*/
|
||||
Contributor(s): ______________________________________.
|
||||
*/
|
||||
|
||||
package org.thdl.tib.scanner;
|
||||
import java.util.Enumeration;
|
||||
|
@ -26,7 +26,7 @@ import java.util.Vector;
|
|||
|
||||
@author Andrés Montano Pellegrini
|
||||
@see SyllableListTree
|
||||
*/
|
||||
*/
|
||||
public class LocalTibetanScanner extends TibetanScanner
|
||||
{
|
||||
public static String archivo;
|
||||
|
@ -307,7 +307,7 @@ public class LocalTibetanScanner extends TibetanScanner
|
|||
return;
|
||||
}
|
||||
|
||||
outAHere:
|
||||
outAHere:
|
||||
while(true)
|
||||
{
|
||||
doNotFinishUp=true;
|
||||
|
|
|
@ -17,6 +17,10 @@ Contributor(s): ______________________________________.
|
|||
*/
|
||||
package org.thdl.tib.scanner;
|
||||
|
||||
import org.thdl.tib.text.*;
|
||||
import org.thdl.tib.text.reverter.*;
|
||||
|
||||
|
||||
/** Miscelaneous static methods for the manipulation of Tibetan text.
|
||||
|
||||
@author Andrés Montano Pellegrini
|
||||
|
@ -24,7 +28,6 @@ package org.thdl.tib.scanner;
|
|||
|
||||
public class Manipulate
|
||||
{
|
||||
|
||||
private static String endOfParagraphMarks = "/;|!:^@#$%=";
|
||||
private static String bracketMarks = "<>(){}[]";
|
||||
private static String endOfSyllableMarks = " _\t";
|
||||
|
@ -179,69 +182,6 @@ public class Manipulate
|
|||
return ch=='a' || ch=='e' || ch=='i' || ch=='o' || ch=='u';
|
||||
}
|
||||
|
||||
public static String wylieToAcip(String palabra)
|
||||
{
|
||||
// DLC FIXME: for unknown things, return null.
|
||||
if (palabra.equals("@##")) return "#";
|
||||
if (palabra.equals("@#")) return "*";
|
||||
if (palabra.equals("!")) return "`";
|
||||
if (palabra.equals("b+h")) return "BH";
|
||||
if (palabra.equals("d+h")) return "DH";
|
||||
if (palabra.equals("X")) return null;
|
||||
if (palabra.equals("iA")) return null;
|
||||
if (palabra.equals("ai")) return "EE";
|
||||
if (palabra.equals("au")) return "OO";
|
||||
if (palabra.equals("$")) return null;
|
||||
if (palabra.startsWith("@") || palabra.startsWith("#"))
|
||||
return null; // we can't convert this in isolation! We need context.
|
||||
char []caract;
|
||||
int i, j, len;
|
||||
String nuevaPalabra;
|
||||
|
||||
caract = palabra.toCharArray();
|
||||
len = palabra.length();
|
||||
for (j=0; j<len; j++)
|
||||
{
|
||||
i = j;
|
||||
/*ciclo:
|
||||
while(true) // para manejar excepciones; que honda!
|
||||
{
|
||||
switch(caract[i])
|
||||
{
|
||||
case 'A':
|
||||
if (i>0)
|
||||
{
|
||||
i--;
|
||||
break;
|
||||
}
|
||||
default:*/
|
||||
if (Character.isLowerCase(caract[i]))
|
||||
caract[i] = Character.toUpperCase(caract[i]);
|
||||
else if (Character.isUpperCase(caract[i]))
|
||||
caract[i] = Character.toLowerCase(caract[i]);
|
||||
/* break ciclo;
|
||||
}
|
||||
}*/
|
||||
}
|
||||
nuevaPalabra = new String(caract);
|
||||
// nuevaPalabra = palabra.toUpperCase();
|
||||
|
||||
// ahora hacer los cambios de Michael Roach
|
||||
|
||||
nuevaPalabra = replace(nuevaPalabra, "TSH", "TQQ");
|
||||
nuevaPalabra = replace(nuevaPalabra, "TS", "TZ");
|
||||
nuevaPalabra = replace(nuevaPalabra, "TQQ", "TS");
|
||||
nuevaPalabra = replace(nuevaPalabra, "a", "'A");
|
||||
nuevaPalabra = replace(nuevaPalabra, "i", "'I");
|
||||
nuevaPalabra = replace(nuevaPalabra, "u", "'U");
|
||||
nuevaPalabra = replace(nuevaPalabra, "-I", "i");
|
||||
nuevaPalabra = replace(nuevaPalabra, "/", ",");
|
||||
nuevaPalabra = replace(nuevaPalabra, "_", " ");
|
||||
nuevaPalabra = replace(nuevaPalabra, "|", ";");
|
||||
nuevaPalabra = fixWazur(nuevaPalabra);
|
||||
return nuevaPalabra;
|
||||
}
|
||||
|
||||
/** If more than half of the first letters among the first are 10 characters
|
||||
are uppercase assume its acip */
|
||||
public static boolean guessIfAcip(String line)
|
||||
|
@ -263,123 +203,28 @@ public class Manipulate
|
|||
else return (letters / upperCase < 2);
|
||||
}
|
||||
|
||||
public static String acipToWylie(String linea)
|
||||
public static boolean isTibetanUnicodeCharacter(char ch)
|
||||
{
|
||||
char caract[], ch, chP, chN;
|
||||
String nuevaLinea;
|
||||
int i, len;
|
||||
boolean open;
|
||||
|
||||
caract = linea.toCharArray();
|
||||
len = linea.length();
|
||||
for (i=0; i<len; i++)
|
||||
{
|
||||
if (Character.isLowerCase(caract[i]))
|
||||
caract[i] = Character.toUpperCase(caract[i]);
|
||||
else if (Character.isUpperCase(caract[i]))
|
||||
caract[i] = Character.toLowerCase(caract[i]);
|
||||
}
|
||||
nuevaLinea = new String(caract);
|
||||
|
||||
/* ahora hacer los cambios de Michael Roach ts -> tsh, tz -> ts, v -> w,
|
||||
TH -> Th, kSH, kaSH -> k+Sh, SH -> Sh, : -> H, dh -> d+h, gh -> g+h, bh -> b+h, dzh -> dz+h,
|
||||
aa -> a, a'a -> A, ai->i, aee ->ai, au->u, aoo->au, ae->e,
|
||||
ao->o, ee->ai, oo->au, 'I->-I I->-i, a'i->I, a'u->U, a'e->E, a'o->O,
|
||||
a'i->I, a'u->U, a'e->E, a'o->O, ,->/, # -> @##, * -> @#, \ -> ?, ` -> !,
|
||||
/-/ -> (-), ga-y -> g.y, g-y -> g.y, na-y -> n+y */
|
||||
|
||||
nuevaLinea = replace(nuevaLinea, "ts", "tq");
|
||||
nuevaLinea = replace(nuevaLinea, "tz", "ts");
|
||||
nuevaLinea = replace(nuevaLinea, "tq", "tsh");
|
||||
nuevaLinea = replace(nuevaLinea, "v", "w");
|
||||
nuevaLinea = replace(nuevaLinea, "TH", "Th");
|
||||
nuevaLinea = replace(nuevaLinea, "kSH", "k+Sh");
|
||||
nuevaLinea = replace(nuevaLinea, "kaSH", "k+Sh");
|
||||
nuevaLinea = replace(nuevaLinea, "SH", "Sh");
|
||||
nuevaLinea = replace(nuevaLinea, ":", "H");
|
||||
nuevaLinea = replace(nuevaLinea, "NH", "NaH");
|
||||
nuevaLinea = replace(nuevaLinea, "dh", "d+h");
|
||||
nuevaLinea = replace(nuevaLinea, "gh", "g+h");
|
||||
nuevaLinea = replace(nuevaLinea, "bh", "b+h");
|
||||
nuevaLinea = replace(nuevaLinea, "dzh", "dz+h");
|
||||
nuevaLinea = replace(nuevaLinea, "aa", "a");
|
||||
nuevaLinea = replace(nuevaLinea, "ai", "i");
|
||||
nuevaLinea = replace(nuevaLinea, "aee", "ai");
|
||||
nuevaLinea = replace(nuevaLinea, "au", "u");
|
||||
nuevaLinea = replace(nuevaLinea, "aoo", "au");
|
||||
nuevaLinea = replace(nuevaLinea, "ae", "e");
|
||||
nuevaLinea = replace(nuevaLinea, "ao", "o");
|
||||
nuevaLinea = replace(nuevaLinea, "ee", "ai");
|
||||
nuevaLinea = replace(nuevaLinea, "oo", "au");
|
||||
nuevaLinea = replace(nuevaLinea, "\'I", "\'q");
|
||||
nuevaLinea = replace(nuevaLinea, "I", "-i");
|
||||
nuevaLinea = replace(nuevaLinea, "\'q", "-I");
|
||||
nuevaLinea = replace(nuevaLinea, "\\", "?");
|
||||
nuevaLinea = replace(nuevaLinea, "`", "!");
|
||||
nuevaLinea = replace(nuevaLinea, "ga-y", "g.y");
|
||||
nuevaLinea = replace(nuevaLinea, "g-y", "g.y");
|
||||
nuevaLinea = replace(nuevaLinea, "na-y", "n+y");
|
||||
|
||||
len = nuevaLinea.length();
|
||||
for (i=0; i<len; i++)
|
||||
{
|
||||
ch = nuevaLinea.charAt(i);
|
||||
switch(ch)
|
||||
{
|
||||
case '#':
|
||||
nuevaLinea = nuevaLinea.substring(0,i) + "@##" + nuevaLinea.substring(i+1);
|
||||
i+=3;
|
||||
len+=2;
|
||||
break;
|
||||
case '*':
|
||||
nuevaLinea = nuevaLinea.substring(0,i) + "@#" + nuevaLinea.substring(i+1);
|
||||
i+=2;
|
||||
len++;
|
||||
break;
|
||||
case '\'':
|
||||
if (i>0 && i<len-1)
|
||||
{
|
||||
chP = nuevaLinea.charAt(i-1);
|
||||
chN = nuevaLinea.charAt(i+1);
|
||||
if (isVowel(chN))
|
||||
{
|
||||
if (Character.isLetter(chP) && !isVowel(chP))
|
||||
{
|
||||
nuevaLinea = nuevaLinea.substring(0, i) + Character.toUpperCase(chN) + nuevaLinea.substring(i+2);
|
||||
len--;
|
||||
}
|
||||
else if (chP=='a' && (i==1 || i>1 && !Character.isLetter(nuevaLinea.charAt(i-2)) || chN == 'a' && (i+2==len || !Character.isLetter(nuevaLinea.charAt(i+2)))))
|
||||
{
|
||||
nuevaLinea = nuevaLinea.substring(0,i-1) + Character.toUpperCase(chN) + nuevaLinea.substring(i+2);
|
||||
len-=2;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return ch>=0xF00 && ch<=0xFFF;
|
||||
}
|
||||
|
||||
open = false;
|
||||
for (i=0; i<len; i++)
|
||||
public static boolean guessIfUnicode(String line)
|
||||
{
|
||||
ch = nuevaLinea.charAt(i);
|
||||
if (ch=='/')
|
||||
char ch;
|
||||
int letters=0, unicode=0, i, n;
|
||||
n = line.length();
|
||||
if (n>10) n = 10;
|
||||
for (i=0; i<n; i++)
|
||||
{
|
||||
if (open)
|
||||
ch = line.charAt(i);
|
||||
if (Character.isLetter(ch))
|
||||
{
|
||||
nuevaLinea = nuevaLinea.substring(0, i) + ")" + nuevaLinea.substring(i+1);
|
||||
open = false;
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
nuevaLinea = nuevaLinea.substring(0, i) + "(" + nuevaLinea.substring(i+1);
|
||||
open = true;
|
||||
letters++;
|
||||
if (isTibetanUnicodeCharacter(ch)) unicode++;
|
||||
}
|
||||
}
|
||||
}
|
||||
nuevaLinea = replace(nuevaLinea, ",", "/");
|
||||
|
||||
return nuevaLinea;
|
||||
if (letters==0 || unicode==0) return false;
|
||||
else return (letters / unicode < 2);
|
||||
}
|
||||
|
||||
public static String fixWazur(String linea)
|
||||
|
@ -530,4 +375,274 @@ public class Manipulate
|
|||
}
|
||||
if (psPalabras!=null) psPalabras.flush();
|
||||
}*/
|
||||
|
||||
public static String acipToWylie(String acip)
|
||||
{
|
||||
TibetanDocument tibDoc = new TibetanDocument();
|
||||
try
|
||||
{
|
||||
TibTextUtils.insertTibetanMachineWebForTranslit(false, acip, tibDoc, 0, false);
|
||||
}
|
||||
catch (InvalidTransliterationException e)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
return tibDoc.getWylie(new boolean[] { false });
|
||||
|
||||
/* char caract[], ch, chP, chN;
|
||||
String nuevaLinea;
|
||||
int i, len;
|
||||
boolean open;
|
||||
|
||||
caract = acip.toCharArray();
|
||||
len = acip.length();
|
||||
for (i=0; i<len; i++)
|
||||
{
|
||||
if (Character.isLowerCase(caract[i]))
|
||||
caract[i] = Character.toUpperCase(caract[i]);
|
||||
else if (Character.isUpperCase(caract[i]))
|
||||
caract[i] = Character.toLowerCase(caract[i]);
|
||||
}
|
||||
nuevaLinea = new String(caract);
|
||||
|
||||
/* ahora hacer los cambios de Michael Roach ts -> tsh, tz -> ts, v -> w,
|
||||
TH -> Th, kSH, kaSH -> k+Sh, SH -> Sh, : -> H, dh -> d+h, gh -> g+h, bh -> b+h, dzh -> dz+h,
|
||||
aa -> a, a'a -> A, ai->i, aee ->ai, au->u, aoo->au, ae->e,
|
||||
ao->o, ee->ai, oo->au, 'I->-I I->-i, a'i->I, a'u->U, a'e->E, a'o->O,
|
||||
a'i->I, a'u->U, a'e->E, a'o->O, ,->/, # -> @##, * -> @#, \ -> ?, ` -> !,
|
||||
/-/ -> (-), ga-y -> g.y, g-y -> g.y, na-y -> n+y
|
||||
|
||||
nuevaLinea = replace(nuevaLinea, "ts", "tq");
|
||||
nuevaLinea = replace(nuevaLinea, "tz", "ts");
|
||||
nuevaLinea = replace(nuevaLinea, "tq", "tsh");
|
||||
nuevaLinea = replace(nuevaLinea, "v", "w");
|
||||
nuevaLinea = replace(nuevaLinea, "TH", "Th");
|
||||
nuevaLinea = replace(nuevaLinea, "kSH", "k+Sh");
|
||||
nuevaLinea = replace(nuevaLinea, "kaSH", "k+Sh");
|
||||
nuevaLinea = replace(nuevaLinea, "SH", "Sh");
|
||||
nuevaLinea = replace(nuevaLinea, ":", "H");
|
||||
nuevaLinea = replace(nuevaLinea, "NH", "NaH");
|
||||
nuevaLinea = replace(nuevaLinea, "dh", "d+h");
|
||||
nuevaLinea = replace(nuevaLinea, "gh", "g+h");
|
||||
nuevaLinea = replace(nuevaLinea, "bh", "b+h");
|
||||
nuevaLinea = replace(nuevaLinea, "dzh", "dz+h");
|
||||
nuevaLinea = replace(nuevaLinea, "aa", "a");
|
||||
nuevaLinea = replace(nuevaLinea, "ai", "i");
|
||||
nuevaLinea = replace(nuevaLinea, "aee", "ai");
|
||||
nuevaLinea = replace(nuevaLinea, "au", "u");
|
||||
nuevaLinea = replace(nuevaLinea, "aoo", "au");
|
||||
nuevaLinea = replace(nuevaLinea, "ae", "e");
|
||||
nuevaLinea = replace(nuevaLinea, "ao", "o");
|
||||
nuevaLinea = replace(nuevaLinea, "ee", "ai");
|
||||
nuevaLinea = replace(nuevaLinea, "oo", "au");
|
||||
nuevaLinea = replace(nuevaLinea, "\'I", "\'q");
|
||||
nuevaLinea = replace(nuevaLinea, "I", "-i");
|
||||
nuevaLinea = replace(nuevaLinea, "\'q", "-I");
|
||||
nuevaLinea = replace(nuevaLinea, "\\", "?");
|
||||
nuevaLinea = replace(nuevaLinea, "`", "!");
|
||||
nuevaLinea = replace(nuevaLinea, "ga-y", "g.y");
|
||||
nuevaLinea = replace(nuevaLinea, "g-y", "g.y");
|
||||
nuevaLinea = replace(nuevaLinea, "na-y", "n+y");
|
||||
|
||||
len = nuevaLinea.length();
|
||||
for (i=0; i<len; i++)
|
||||
{
|
||||
ch = nuevaLinea.charAt(i);
|
||||
switch(ch)
|
||||
{
|
||||
case '#':
|
||||
nuevaLinea = nuevaLinea.substring(0,i) + "@##" + nuevaLinea.substring(i+1);
|
||||
i+=3;
|
||||
len+=2;
|
||||
break;
|
||||
case '*':
|
||||
nuevaLinea = nuevaLinea.substring(0,i) + "@#" + nuevaLinea.substring(i+1);
|
||||
i+=2;
|
||||
len++;
|
||||
break;
|
||||
case '\'':
|
||||
if (i>0 && i<len-1)
|
||||
{
|
||||
chP = nuevaLinea.charAt(i-1);
|
||||
chN = nuevaLinea.charAt(i+1);
|
||||
if (isVowel(chN))
|
||||
{
|
||||
if (Character.isLetter(chP) && !isVowel(chP))
|
||||
{
|
||||
nuevaLinea = nuevaLinea.substring(0, i) + Character.toUpperCase(chN) + nuevaLinea.substring(i+2);
|
||||
len--;
|
||||
}
|
||||
else if (chP=='a' && (i==1 || i>1 && !Character.isLetter(nuevaLinea.charAt(i-2)) || chN == 'a' && (i+2==len || !Character.isLetter(nuevaLinea.charAt(i+2)))))
|
||||
{
|
||||
nuevaLinea = nuevaLinea.substring(0,i-1) + Character.toUpperCase(chN) + nuevaLinea.substring(i+2);
|
||||
len-=2;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
open = false;
|
||||
for (i=0; i<len; i++)
|
||||
{
|
||||
ch = nuevaLinea.charAt(i);
|
||||
if (ch=='/')
|
||||
{
|
||||
if (open)
|
||||
{
|
||||
nuevaLinea = nuevaLinea.substring(0, i) + ")" + nuevaLinea.substring(i+1);
|
||||
open = false;
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
nuevaLinea = nuevaLinea.substring(0, i) + "(" + nuevaLinea.substring(i+1);
|
||||
open = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
nuevaLinea = replace(nuevaLinea, ",", "/");
|
||||
|
||||
return nuevaLinea; */
|
||||
}
|
||||
|
||||
public static String wylieToAcip(String wylie)
|
||||
{
|
||||
TibetanDocument tibDoc = new TibetanDocument();
|
||||
try
|
||||
{
|
||||
TibTextUtils.insertTibetanMachineWebForTranslit(false, wylie, tibDoc, 0, false);
|
||||
}
|
||||
catch (InvalidTransliterationException e)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
return tibDoc.getACIP(new boolean[] { false });
|
||||
|
||||
/* DLC FIXME: for unknown things, return null.
|
||||
if (wylie.equals("@##")) return "#";
|
||||
if (wylie.equals("@#")) return "*";
|
||||
if (wylie.equals("!")) return "`";
|
||||
if (wylie.equals("b+h")) return "BH";
|
||||
if (wylie.equals("d+h")) return "DH";
|
||||
if (wylie.equals("X")) return null;
|
||||
if (wylie.equals("iA")) return null;
|
||||
if (wylie.equals("ai")) return "EE";
|
||||
if (wylie.equals("au")) return "OO";
|
||||
if (wylie.equals("$")) return null;
|
||||
if (wylie.startsWith("@") || wylie.startsWith("#"))
|
||||
return null; // we can't convert this in isolation! We need context.
|
||||
char []caract;
|
||||
int i, j, len;
|
||||
String nuevaPalabra;
|
||||
|
||||
caract = wylie.toCharArray();
|
||||
len = wylie.length();
|
||||
for (j=0; j<len; j++)
|
||||
{
|
||||
i = j;
|
||||
/*ciclo:
|
||||
while(true) // para manejar excepciones; que honda!
|
||||
{
|
||||
switch(caract[i])
|
||||
{
|
||||
case 'A':
|
||||
if (i>0)
|
||||
{
|
||||
i--;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
if (Character.isLowerCase(caract[i]))
|
||||
caract[i] = Character.toUpperCase(caract[i]);
|
||||
else if (Character.isUpperCase(caract[i]))
|
||||
caract[i] = Character.toLowerCase(caract[i]);
|
||||
/* break ciclo;
|
||||
}
|
||||
}
|
||||
}
|
||||
nuevaPalabra = new String(caract);
|
||||
// nuevaPalabra = palabra.toUpperCase();
|
||||
|
||||
// ahora hacer los cambios de Michael Roach
|
||||
|
||||
nuevaPalabra = replace(nuevaPalabra, "TSH", "TQQ");
|
||||
nuevaPalabra = replace(nuevaPalabra, "TS", "TZ");
|
||||
nuevaPalabra = replace(nuevaPalabra, "TQQ", "TS");
|
||||
nuevaPalabra = replace(nuevaPalabra, "a", "'A");
|
||||
nuevaPalabra = replace(nuevaPalabra, "i", "'I");
|
||||
nuevaPalabra = replace(nuevaPalabra, "u", "'U");
|
||||
nuevaPalabra = replace(nuevaPalabra, "-I", "i");
|
||||
nuevaPalabra = replace(nuevaPalabra, "/", ",");
|
||||
nuevaPalabra = replace(nuevaPalabra, "_", " ");
|
||||
nuevaPalabra = replace(nuevaPalabra, "|", ";");
|
||||
nuevaPalabra = fixWazur(nuevaPalabra);
|
||||
return nuevaPalabra; */
|
||||
}
|
||||
|
||||
public static String unicodeToWylie(String unicode)
|
||||
{
|
||||
String machineWylie;
|
||||
TibetanDocument tibDoc = new TibetanDocument();
|
||||
StringBuffer errors = new StringBuffer();
|
||||
|
||||
machineWylie = Converter.convertToEwtsForComputers(unicode, errors);
|
||||
try
|
||||
{
|
||||
TibTextUtils.insertTibetanMachineWebForTranslit(true, machineWylie, tibDoc, 0, false);
|
||||
}
|
||||
catch (InvalidTransliterationException e)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
return tibDoc.getWylie(new boolean[] { false });
|
||||
}
|
||||
|
||||
/** From http://www.i18nfaq.com/2005/07/how-do-i-convert-ncr-format-to-java.html */
|
||||
public static String NCR2UnicodeString(String str)
|
||||
{
|
||||
StringBuffer ostr = new StringBuffer();
|
||||
int i1=0;
|
||||
int i2=0;
|
||||
|
||||
while(i2<str.length())
|
||||
{
|
||||
i1 = str.indexOf("&#",i2);
|
||||
if (i1 == -1 ) {
|
||||
ostr.append(str.substring(i2, str.length()));
|
||||
break ;
|
||||
}
|
||||
ostr.append(str.substring(i2, i1));
|
||||
i2 = str.indexOf(";", i1);
|
||||
if (i2 == -1 ) {
|
||||
ostr.append(str.substring(i1, str.length()));
|
||||
break ;
|
||||
}
|
||||
|
||||
String tok = str.substring(i1+2, i2);
|
||||
try {
|
||||
int radix = 10 ;
|
||||
if (tok.trim().charAt(0) == 'x') {
|
||||
radix = 16 ;
|
||||
tok = tok.substring(1,tok.length());
|
||||
}
|
||||
ostr.append((char) Integer.parseInt(tok, radix));
|
||||
} catch (NumberFormatException exp) {
|
||||
ostr.append('?') ;
|
||||
}
|
||||
i2++ ;
|
||||
}
|
||||
return new String(ostr) ;
|
||||
}
|
||||
|
||||
public static String UnicodeString2NCR(String str)
|
||||
{
|
||||
StringBuffer ncr = new StringBuffer();
|
||||
int i;
|
||||
for (i=0; i<str.length(); i++)
|
||||
{
|
||||
ncr.append("&#" + Integer.toString(str.charAt(i)) + ";");
|
||||
}
|
||||
return ncr.toString();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,20 +1,20 @@
|
|||
/*
|
||||
The contents of this file are subject to the AMP Open Community License
|
||||
Version 1.0 (the "License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License on the AMP web site
|
||||
(http://www.tibet.iteso.mx/Guatemala/).
|
||||
The contents of this file are subject to the AMP Open Community License
|
||||
Version 1.0 (the "License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License on the AMP web site
|
||||
(http://www.tibet.iteso.mx/Guatemala/).
|
||||
|
||||
Software distributed under the License is distributed on an "AS IS" basis,
|
||||
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
|
||||
License for the specific terms governing rights and limitations under the
|
||||
License.
|
||||
Software distributed under the License is distributed on an "AS IS" basis,
|
||||
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
|
||||
License for the specific terms governing rights and limitations under the
|
||||
License.
|
||||
|
||||
The Initial Developer of this software is Andres Montano Pellegrini. Portions
|
||||
created by Andres Montano Pellegrini are Copyright 2001 Andres Montano
|
||||
Pellegrini. All Rights Reserved.
|
||||
The Initial Developer of this software is Andres Montano Pellegrini. Portions
|
||||
created by Andres Montano Pellegrini are Copyright 2001 Andres Montano
|
||||
Pellegrini. All Rights Reserved.
|
||||
|
||||
Contributor(s): ______________________________________.
|
||||
*/
|
||||
Contributor(s): ______________________________________.
|
||||
*/
|
||||
|
||||
package org.thdl.tib.scanner;
|
||||
|
||||
|
@ -35,7 +35,7 @@ import org.thdl.util.ThdlOptions;
|
|||
by the browser. Requires no additional software installed on the client.
|
||||
|
||||
@author Andrés Montano Pellegrini
|
||||
*/
|
||||
*/
|
||||
public class OnLineScannerFilter extends HttpServlet
|
||||
{
|
||||
private final static String propertyFile = "dictionary";
|
||||
|
@ -105,8 +105,8 @@ public class OnLineScannerFilter extends HttpServlet
|
|||
{
|
||||
out.println(" <title>Tibetan and Himalayan Digital Library - The Online Tibetan to English Translation/Dictionary Tool</title>");
|
||||
out.println(" <meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />");
|
||||
out.println(" <script type=\"text/javascript\" src=\"/thdl/scripts/thdl_scripts.js\"></script>");
|
||||
out.println(" <link rel=\"stylesheet\" type=\"text/css\" href=\"/thdl/style/thdl-styles.css\"/>");
|
||||
out.println(" <script type=\"text/javascript\" src=\"http://www.thdl.org/scripts/thdl_scripts.js\"></script>");
|
||||
out.println(" <link rel=\"stylesheet\" type=\"text/css\" href=\"http://www.thdl.org/style/thdl-styles.css\"/>");
|
||||
}
|
||||
else
|
||||
out.println(" <title>The Online Tibetan to English Translation/Dictionary Tool</title>");
|
||||
|
@ -121,7 +121,7 @@ public class OnLineScannerFilter extends HttpServlet
|
|||
script!=null && makes default roman
|
||||
*/
|
||||
wantsTibetan = (answer==null || answer.equals(tibetanStr));
|
||||
if (wantsTibetan)
|
||||
/*if (wantsTibetan)
|
||||
{
|
||||
out.println("<style>.tmw {font: 28pt TibetanMachineWeb}");
|
||||
out.println(".tmw1 {font: 28pt TibetanMachineWeb1}");
|
||||
|
@ -134,7 +134,7 @@ public class OnLineScannerFilter extends HttpServlet
|
|||
out.println(".tmw8 {font: 28pt TibetanMachineWeb8}");
|
||||
out.println(".tmw9 {font: 28pt TibetanMachineWeb9}");
|
||||
out.println("</style>");
|
||||
}
|
||||
}*/
|
||||
out.println("</head>");
|
||||
out.println("<body>");
|
||||
|
||||
|
@ -184,7 +184,7 @@ public class OnLineScannerFilter extends HttpServlet
|
|||
out.println(" <td width=\"75%\">");
|
||||
out.println(" <p><input type=\"radio\" value=\"" + tibetanStr + "\" ");
|
||||
if (wantsTibetan) out.println("checked ");
|
||||
out.println("name=\"" + scriptStr + "\">Tibetan script (using <a href=\"http://orion.lib.virginia.edu/thdl/tools/tmw.html\" target=\"_blank\">Tibetan Machine Web font</a>)<br/>");
|
||||
out.println("name=\"" + scriptStr + "\">Tibetan script (using <a href=\"http://www.thdl.org/xml/show.php?xml=/tools/tibfonts.xml&l=uva10928423419921\" target=\"_blank\">Tibetan Machine Uni font</a>)<br/>");
|
||||
out.println(" <input type=\"radio\" value=\"roman\" ");
|
||||
if (!wantsTibetan) out.println("checked ");
|
||||
out.println("name=\"" + scriptStr + "\">Roman script</td>");
|
||||
|
@ -197,7 +197,7 @@ public class OnLineScannerFilter extends HttpServlet
|
|||
ds = scanner.getDictionarySource();
|
||||
ds.reset();
|
||||
checkedDicts = new boolean[dictionaries.length];
|
||||
/* out.println(" <tr>");
|
||||
/* out.println(" <tr>");
|
||||
out.println("<td width=\""+ percent +"%\">Search in dictionaries:</td>");*/
|
||||
out.println("<p>Search in dictionaries: ");
|
||||
allUnchecked=true;
|
||||
|
@ -255,7 +255,9 @@ public class OnLineScannerFilter extends HttpServlet
|
|||
out.println(" </tr>");
|
||||
out.println("</table>");
|
||||
|
||||
out.println("<textarea rows=\"12\" name=\"parrafo\" cols=\"60\">");
|
||||
out.print("<textarea rows=\"5\" name=\"parrafo\" cols=\"40\"");
|
||||
if (wantsTibetan) out.print(" class=\"tib\"");
|
||||
out.println(">");
|
||||
|
||||
// Paragraph should be empty if the user just clicked the clear button
|
||||
answer = request.getParameter(buttonStr);
|
||||
|
@ -314,6 +316,9 @@ public class OnLineScannerFilter extends HttpServlet
|
|||
init = fin+1;
|
||||
} */
|
||||
scanner.clearTokens();
|
||||
in = Manipulate.NCR2UnicodeString(in);
|
||||
if (Manipulate.guessIfUnicode(in)) in = Manipulate.unicodeToWylie(in);
|
||||
else if (Manipulate.guessIfAcip(in)) in = Manipulate.acipToWylie(in);
|
||||
scanner.scanBody(in);
|
||||
scanner.finishUp();
|
||||
printText(pw, tibetan);
|
||||
|
@ -339,7 +344,7 @@ public class OnLineScannerFilter extends HttpServlet
|
|||
{
|
||||
word = new SwingWord((Word)words[i]);
|
||||
// if (word.getDefs().getDictionarySource()!=null)
|
||||
pw.print(word.getLink());
|
||||
pw.print(word.getLink(tibetan));
|
||||
// else pw.print(word.getWylie() + " ");
|
||||
}
|
||||
else
|
||||
|
@ -407,9 +412,10 @@ public class OnLineScannerFilter extends HttpServlet
|
|||
}
|
||||
}
|
||||
|
||||
pw.println(" <td width=\"20%\" rowspan=\"" + defs.def.length
|
||||
+ "\" valign=\"top\">" + word.getBookmark(tibetan)
|
||||
+ "</td>");
|
||||
pw.print(" <td width=\"20%\" rowspan=\"" + defs.def.length
|
||||
+ "\" valign=\"top\"");
|
||||
if (tibetan) pw.print(" class=\"tib\"");
|
||||
pw.println(">" + word.getBookmark(tibetan) + "</td>");
|
||||
pw.println(" <td width=\"12%\">" + tag + "</td>");
|
||||
pw.println(" <td width=\"68%\">" + defs.def[0] + "</td>");
|
||||
|
||||
|
|
|
@ -1,20 +1,20 @@
|
|||
/*
|
||||
The contents of this file are subject to the AMP Open Community License
|
||||
Version 1.0 (the "License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License on the AMP web site
|
||||
(http://www.tibet.iteso.mx/Guatemala/).
|
||||
The contents of this file are subject to the AMP Open Community License
|
||||
Version 1.0 (the "License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License on the AMP web site
|
||||
(http://www.tibet.iteso.mx/Guatemala/).
|
||||
|
||||
Software distributed under the License is distributed on an "AS IS" basis,
|
||||
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
|
||||
License for the specific terms governing rights and limitations under the
|
||||
License.
|
||||
Software distributed under the License is distributed on an "AS IS" basis,
|
||||
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
|
||||
License for the specific terms governing rights and limitations under the
|
||||
License.
|
||||
|
||||
The Initial Developer of this software is Andres Montano Pellegrini. Portions
|
||||
created by Andres Montano Pellegrini are Copyright 2001 Andres Montano
|
||||
Pellegrini. All Rights Reserved.
|
||||
The Initial Developer of this software is Andres Montano Pellegrini. Portions
|
||||
created by Andres Montano Pellegrini are Copyright 2001 Andres Montano
|
||||
Pellegrini. All Rights Reserved.
|
||||
|
||||
Contributor(s): ______________________________________.
|
||||
*/
|
||||
Contributor(s): ______________________________________.
|
||||
*/
|
||||
|
||||
package org.thdl.tib.scanner;
|
||||
|
||||
|
@ -29,6 +29,7 @@ import javax.swing.text.BadLocationException;
|
|||
|
||||
import org.thdl.tib.input.DuffPane;
|
||||
import org.thdl.tib.text.TibetanDocument;
|
||||
import org.thdl.tib.text.reverter.*;
|
||||
import org.thdl.util.RTFFixerInputStream;
|
||||
import org.thdl.util.ThdlDebug;
|
||||
import org.thdl.util.ThdlOptions;
|
||||
|
@ -39,7 +40,7 @@ import org.thdl.util.ThdlOptions;
|
|||
accordingly to TibetanMachineWeb. If text in TibetanMachine is pasted,
|
||||
it is converted to TibetanMachineWeb. Any other font is assumed to be
|
||||
Roman script.
|
||||
*/
|
||||
*/
|
||||
public class StrictDuffPane extends DuffPane
|
||||
{
|
||||
public StrictDuffPane()
|
||||
|
@ -54,6 +55,8 @@ public class StrictDuffPane extends DuffPane
|
|||
*/
|
||||
public void paste(int offset)
|
||||
{
|
||||
boolean pasteAsString = false;
|
||||
|
||||
// Respect setEditable(boolean):
|
||||
if (!this.isEditable())
|
||||
return;
|
||||
|
@ -90,9 +93,7 @@ public class StrictDuffPane extends DuffPane
|
|||
*/
|
||||
if (contents.isDataFlavorSupported(DataFlavor.stringFlavor))
|
||||
{
|
||||
String data = (String)contents.getTransferData(DataFlavor.stringFlavor);
|
||||
if (Manipulate.guessIfAcip(data)) data = Manipulate.acipToWylie(data);
|
||||
toTibetanMachineWeb(data, offset);
|
||||
pasteAsString = true;
|
||||
}
|
||||
// JOptionPane.showMessageDialog(this, "You cannot paste from the application from which you copied.\nIt uses an RTF format that is too advanced for the version\nof Java Jskad is running atop.");
|
||||
}
|
||||
|
@ -105,9 +106,7 @@ public class StrictDuffPane extends DuffPane
|
|||
if (!sd.getFont((sd.getCharacterElement(0).getAttributes())).getFamily().startsWith("TibetanMachine")
|
||||
&& contents.isDataFlavorSupported(DataFlavor.stringFlavor))
|
||||
{
|
||||
String data = (String)contents.getTransferData(DataFlavor.stringFlavor);
|
||||
if (Manipulate.guessIfAcip(data)) data = Manipulate.acipToWylie(data);
|
||||
toTibetanMachineWeb(data, offset);
|
||||
pasteAsString = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -137,10 +136,20 @@ public class StrictDuffPane extends DuffPane
|
|||
else if (contents.isDataFlavorSupported(DataFlavor.stringFlavor))
|
||||
{
|
||||
// if it is not in a font, assume it is wylie or ACIP.
|
||||
pasteAsString = true;
|
||||
}
|
||||
|
||||
if (pasteAsString)
|
||||
{
|
||||
String data = (String)contents.getTransferData(DataFlavor.stringFlavor);
|
||||
if (Manipulate.guessIfAcip(data)) data = Manipulate.acipToWylie(data);
|
||||
if (Manipulate.guessIfUnicode(data))
|
||||
{
|
||||
StringBuffer errors = new StringBuffer();
|
||||
data = Converter.convertToEwtsForComputers(data, errors);
|
||||
} else if (Manipulate.guessIfAcip(data)) data = Manipulate.acipToWylie(data);
|
||||
toTibetanMachineWeb(data, offset);
|
||||
}
|
||||
|
||||
} catch (UnsupportedFlavorException ufe) {
|
||||
ufe.printStackTrace();
|
||||
ThdlDebug.noteIffyCode();
|
||||
|
|
|
@ -20,7 +20,8 @@ Contributor(s): ______________________________________.
|
|||
to store the dictionary. */
|
||||
package org.thdl.tib.scanner;
|
||||
|
||||
import org.thdl.tib.text.TibetanHTML;
|
||||
//import org.thdl.tib.text.TibetanHTML;
|
||||
import org.thdl.tib.text.ttt.*;
|
||||
|
||||
/** Tibetan word with its corresponding definitions.
|
||||
|
||||
|
@ -60,7 +61,8 @@ public class SwingWord extends Word
|
|||
{
|
||||
try
|
||||
{
|
||||
localWord = TibetanHTML.getHTML(super.token + " ");
|
||||
// localWord = TibetanHTML.getHTML(super.token + " ");
|
||||
localWord = Manipulate.UnicodeString2NCR(EwtsToUnicodeForXslt.convertEwtsTo(super.token + " "));
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
|
@ -82,6 +84,7 @@ public class SwingWord extends Word
|
|||
public String getLink(boolean tibetan)
|
||||
{
|
||||
String localWord, result=null;
|
||||
String className = "";
|
||||
|
||||
if (wordSinDec==null) localWord = super.token;
|
||||
else localWord = wordSinDec;
|
||||
|
@ -89,7 +92,8 @@ public class SwingWord extends Word
|
|||
{
|
||||
try
|
||||
{
|
||||
result = TibetanHTML.getHTML(localWord + " ");
|
||||
result = Manipulate.UnicodeString2NCR(EwtsToUnicodeForXslt.convertEwtsTo(localWord + " "));
|
||||
className = " class = \"tib\"";
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
|
@ -101,6 +105,6 @@ public class SwingWord extends Word
|
|||
if (tibetan) result+= "</a>";
|
||||
else result+= "</a> ";
|
||||
return result;*/
|
||||
return "<a href=\"#" + super.token + "\">" + result + "</a> ";
|
||||
return "<a href=\"#" + super.token + "\"" + className + ">" + result + "</a> ";
|
||||
}
|
||||
}
|
||||
|
|
|
@ -27,7 +27,7 @@ import org.thdl.util.ThdlVersion;
|
|||
*/
|
||||
public abstract class TibetanScanner
|
||||
{
|
||||
public static final String version = "The Tibetan to English Translation Tool, version 3.2.1 compiled on " + ThdlVersion.getTimeOfCompilation() + ". ";
|
||||
public static final String version = "The Tibetan to English Translation Tool, version 3.3.0 compiled on " + ThdlVersion.getTimeOfCompilation() + ". ";
|
||||
public static final String copyrightUnicode="Copyright " + '\u00A9' + " 2000-2005 by Andr" + '\u00E9' + "s Montano Pellegrini, all rights reserved.";
|
||||
public static final String copyrightASCII="Copyright 2000-2005 by Andres Montano Pellegrini, all rights reserved.";
|
||||
public static final String copyrightHTML="<hr><small><strong>" + version + "Copyright © 2000-2005 by <a href=\"http://www.people.virginia.edu/~am2zb/\" target=\"_blank\">Andrés Montano Pellegrini.</a><br/>All rights reserved.</strong></small>";
|
||||
|
|
Loading…
Reference in a new issue