The org.thdl.tib.scanner.Manipulate class was originally meant as a grab-bag of methods processing strings representing tibetan wylie. It doesn't make sense to leave there the wrap-up methods for converting from and to the various transcription schemes. Moved them to BasicTibetanTranscriptionConverter and updated all classes that point to them.

This commit is contained in:
amontano 2006-04-24 19:19:04 +00:00
parent 67bddb7a7e
commit 0c891ec96c
8 changed files with 290 additions and 243 deletions

View file

@ -99,7 +99,7 @@ public class AcipToWylie
String linea;
while ((linea=in.readLine())!=null)
{
out.println(Manipulate.acipToWylie(linea));
out.println(BasicTibetanTranscriptionConverter.acipToWylie(linea));
}
out.flush();
}

View file

@ -0,0 +1,278 @@
/*
The contents of this file are subject to the AMP Open Community License
Version 1.0 (the "License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License on the AMP web site
(http://www.tibet.iteso.mx/Guatemala/).
Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
License for the specific terms governing rights and limitations under the
License.
The Initial Developer of this software is Andres Montano Pellegrini. Portions
created by Andres Montano Pellegrini are Copyright 2001 Andres Montano
Pellegrini. All Rights Reserved.
Contributor(s): ______________________________________.
*/
package org.thdl.tib.scanner;
import org.thdl.tib.text.InvalidTransliterationException;
import org.thdl.tib.text.TibTextUtils;
import org.thdl.tib.text.TibetanDocument;
import org.thdl.tib.text.reverter.Converter;
import org.thdl.tib.text.ttt.EwtsToUnicodeForXslt;
/**
* Wrap-up class for the various converters that the Translation Tool needs.
* All conversions are done by static methods meant to be as straight-forward
* and simple as possible not caring about error or warning messages.
*
* @author Andres Montano
*
*/
public class BasicTibetanTranscriptionConverter {
/** Converts from the Acip transliteration scheme to EWTS.*/
public static String acipToWylie(String acip)
{
TibetanDocument tibDoc = new TibetanDocument();
try
{
TibTextUtils.insertTibetanMachineWebForTranslit(false, acip, tibDoc, 0, false);
}
catch (InvalidTransliterationException e)
{
return null;
}
return tibDoc.getWylie(new boolean[] { false });
/* char caract[], ch, chP, chN;
String nuevaLinea;
int i, len;
boolean open;
caract = acip.toCharArray();
len = acip.length();
for (i=0; i<len; i++)
{
if (Character.isLowerCase(caract[i]))
caract[i] = Character.toUpperCase(caract[i]);
else if (Character.isUpperCase(caract[i]))
caract[i] = Character.toLowerCase(caract[i]);
}
nuevaLinea = new String(caract);
/* ahora hacer los cambios de Michael Roach ts -> tsh, tz -> ts, v -> w,
TH -> Th, kSH, kaSH -> k+Sh, SH -> Sh, : -> H, dh -> d+h, gh -> g+h, bh -> b+h, dzh -> dz+h,
aa -> a, a'a -> A, ai->i, aee ->ai, au->u, aoo->au, ae->e,
ao->o, ee->ai, oo->au, 'I->-I I->-i, a'i->I, a'u->U, a'e->E, a'o->O,
a'i->I, a'u->U, a'e->E, a'o->O, ,->/, # -> @##, * -> @#, \ -> ?, ` -> !,
/-/ -> (-), ga-y -> g.y, g-y -> g.y, na-y -> n+y
nuevaLinea = replace(nuevaLinea, "ts", "tq");
nuevaLinea = replace(nuevaLinea, "tz", "ts");
nuevaLinea = replace(nuevaLinea, "tq", "tsh");
nuevaLinea = replace(nuevaLinea, "v", "w");
nuevaLinea = replace(nuevaLinea, "TH", "Th");
nuevaLinea = replace(nuevaLinea, "kSH", "k+Sh");
nuevaLinea = replace(nuevaLinea, "kaSH", "k+Sh");
nuevaLinea = replace(nuevaLinea, "SH", "Sh");
nuevaLinea = replace(nuevaLinea, ":", "H");
nuevaLinea = replace(nuevaLinea, "NH", "NaH");
nuevaLinea = replace(nuevaLinea, "dh", "d+h");
nuevaLinea = replace(nuevaLinea, "gh", "g+h");
nuevaLinea = replace(nuevaLinea, "bh", "b+h");
nuevaLinea = replace(nuevaLinea, "dzh", "dz+h");
nuevaLinea = replace(nuevaLinea, "aa", "a");
nuevaLinea = replace(nuevaLinea, "ai", "i");
nuevaLinea = replace(nuevaLinea, "aee", "ai");
nuevaLinea = replace(nuevaLinea, "au", "u");
nuevaLinea = replace(nuevaLinea, "aoo", "au");
nuevaLinea = replace(nuevaLinea, "ae", "e");
nuevaLinea = replace(nuevaLinea, "ao", "o");
nuevaLinea = replace(nuevaLinea, "ee", "ai");
nuevaLinea = replace(nuevaLinea, "oo", "au");
nuevaLinea = replace(nuevaLinea, "\'I", "\'q");
nuevaLinea = replace(nuevaLinea, "I", "-i");
nuevaLinea = replace(nuevaLinea, "\'q", "-I");
nuevaLinea = replace(nuevaLinea, "\\", "?");
nuevaLinea = replace(nuevaLinea, "`", "!");
nuevaLinea = replace(nuevaLinea, "ga-y", "g.y");
nuevaLinea = replace(nuevaLinea, "g-y", "g.y");
nuevaLinea = replace(nuevaLinea, "na-y", "n+y");
len = nuevaLinea.length();
for (i=0; i<len; i++)
{
ch = nuevaLinea.charAt(i);
switch(ch)
{
case '#':
nuevaLinea = nuevaLinea.substring(0,i) + "@##" + nuevaLinea.substring(i+1);
i+=3;
len+=2;
break;
case '*':
nuevaLinea = nuevaLinea.substring(0,i) + "@#" + nuevaLinea.substring(i+1);
i+=2;
len++;
break;
case '\'':
if (i>0 && i<len-1)
{
chP = nuevaLinea.charAt(i-1);
chN = nuevaLinea.charAt(i+1);
if (isVowel(chN))
{
if (Character.isLetter(chP) && !isVowel(chP))
{
nuevaLinea = nuevaLinea.substring(0, i) + Character.toUpperCase(chN) + nuevaLinea.substring(i+2);
len--;
}
else if (chP=='a' && (i==1 || i>1 && !Character.isLetter(nuevaLinea.charAt(i-2)) || chN == 'a' && (i+2==len || !Character.isLetter(nuevaLinea.charAt(i+2)))))
{
nuevaLinea = nuevaLinea.substring(0,i-1) + Character.toUpperCase(chN) + nuevaLinea.substring(i+2);
len-=2;
}
}
}
}
}
open = false;
for (i=0; i<len; i++)
{
ch = nuevaLinea.charAt(i);
if (ch=='/')
{
if (open)
{
nuevaLinea = nuevaLinea.substring(0, i) + ")" + nuevaLinea.substring(i+1);
open = false;
}
else
{
nuevaLinea = nuevaLinea.substring(0, i) + "(" + nuevaLinea.substring(i+1);
open = true;
}
}
}
nuevaLinea = replace(nuevaLinea, ",", "/");
return nuevaLinea; */
}
/** Converts from EWTS to the ACIP transliteration scheme. */
public static String wylieToAcip(String wylie)
{
TibetanDocument tibDoc = new TibetanDocument();
try
{
TibTextUtils.insertTibetanMachineWebForTranslit(false, wylie, tibDoc, 0, false);
}
catch (InvalidTransliterationException e)
{
return null;
}
return tibDoc.getACIP(new boolean[] { false });
/* DLC FIXME: for unknown things, return null.
if (wylie.equals("@##")) return "#";
if (wylie.equals("@#")) return "*";
if (wylie.equals("!")) return "`";
if (wylie.equals("b+h")) return "BH";
if (wylie.equals("d+h")) return "DH";
if (wylie.equals("X")) return null;
if (wylie.equals("iA")) return null;
if (wylie.equals("ai")) return "EE";
if (wylie.equals("au")) return "OO";
if (wylie.equals("$")) return null;
if (wylie.startsWith("@") || wylie.startsWith("#"))
return null; // we can't convert this in isolation! We need context.
char []caract;
int i, j, len;
String nuevaPalabra;
caract = wylie.toCharArray();
len = wylie.length();
for (j=0; j<len; j++)
{
i = j;
/*ciclo:
while(true) // para manejar excepciones; que honda!
{
switch(caract[i])
{
case 'A':
if (i>0)
{
i--;
break;
}
default:
if (Character.isLowerCase(caract[i]))
caract[i] = Character.toUpperCase(caract[i]);
else if (Character.isUpperCase(caract[i]))
caract[i] = Character.toLowerCase(caract[i]);
/* break ciclo;
}
}
}
nuevaPalabra = new String(caract);
// nuevaPalabra = palabra.toUpperCase();
// ahora hacer los cambios de Michael Roach
nuevaPalabra = replace(nuevaPalabra, "TSH", "TQQ");
nuevaPalabra = replace(nuevaPalabra, "TS", "TZ");
nuevaPalabra = replace(nuevaPalabra, "TQQ", "TS");
nuevaPalabra = replace(nuevaPalabra, "a", "'A");
nuevaPalabra = replace(nuevaPalabra, "i", "'I");
nuevaPalabra = replace(nuevaPalabra, "u", "'U");
nuevaPalabra = replace(nuevaPalabra, "-I", "i");
nuevaPalabra = replace(nuevaPalabra, "/", ",");
nuevaPalabra = replace(nuevaPalabra, "_", " ");
nuevaPalabra = replace(nuevaPalabra, "|", ";");
nuevaPalabra = fixWazur(nuevaPalabra);
return nuevaPalabra; */
}
/** Converts Tibetan Unicode to EWTS. */
public static String unicodeToWylie(String unicode)
{
String machineWylie;
TibetanDocument tibDoc = new TibetanDocument();
StringBuffer errors = new StringBuffer();
machineWylie = Converter.convertToEwtsForComputers(unicode, errors);
try
{
TibTextUtils.insertTibetanMachineWebForTranslit(true, machineWylie, tibDoc, 0, false);
}
catch (InvalidTransliterationException e)
{
return null;
}
return tibDoc.getWylie(new boolean[] { false });
}
/** Converts EWTS to Tibetan Unicode. */
public static String wylieToUnicode(String wylie)
{
return EwtsToUnicodeForXslt.convertEwtsTo(wylie);
}
/** Converts EWTS to Tibetan Unicode represented in NCR. */
public static String wylieToHTMLUnicode(String wylie)
{
return Manipulate.UnicodeString2NCR(wylieToUnicode(wylie));
}
/** Converts Tibetan Unicode represented in NCR to EWTS. */
public static String HTMLUnicodeToWylie(String unicode)
{
return unicodeToWylie(Manipulate.NCR2UnicodeString(unicode));
}
}

View file

@ -17,10 +17,6 @@ Contributor(s): ______________________________________.
*/
package org.thdl.tib.scanner;
import org.thdl.tib.text.*;
import org.thdl.tib.text.reverter.*;
/** Miscelaneous static methods for the manipulation of Tibetan text.
@author Andr&eacute;s Montano Pellegrini
@ -376,228 +372,6 @@ public class Manipulate
if (psPalabras!=null) psPalabras.flush();
}*/
public static String acipToWylie(String acip)
{
TibetanDocument tibDoc = new TibetanDocument();
try
{
TibTextUtils.insertTibetanMachineWebForTranslit(false, acip, tibDoc, 0, false);
}
catch (InvalidTransliterationException e)
{
return null;
}
return tibDoc.getWylie(new boolean[] { false });
/* char caract[], ch, chP, chN;
String nuevaLinea;
int i, len;
boolean open;
caract = acip.toCharArray();
len = acip.length();
for (i=0; i<len; i++)
{
if (Character.isLowerCase(caract[i]))
caract[i] = Character.toUpperCase(caract[i]);
else if (Character.isUpperCase(caract[i]))
caract[i] = Character.toLowerCase(caract[i]);
}
nuevaLinea = new String(caract);
/* ahora hacer los cambios de Michael Roach ts -> tsh, tz -> ts, v -> w,
TH -> Th, kSH, kaSH -> k+Sh, SH -> Sh, : -> H, dh -> d+h, gh -> g+h, bh -> b+h, dzh -> dz+h,
aa -> a, a'a -> A, ai->i, aee ->ai, au->u, aoo->au, ae->e,
ao->o, ee->ai, oo->au, 'I->-I I->-i, a'i->I, a'u->U, a'e->E, a'o->O,
a'i->I, a'u->U, a'e->E, a'o->O, ,->/, # -> @##, * -> @#, \ -> ?, ` -> !,
/-/ -> (-), ga-y -> g.y, g-y -> g.y, na-y -> n+y
nuevaLinea = replace(nuevaLinea, "ts", "tq");
nuevaLinea = replace(nuevaLinea, "tz", "ts");
nuevaLinea = replace(nuevaLinea, "tq", "tsh");
nuevaLinea = replace(nuevaLinea, "v", "w");
nuevaLinea = replace(nuevaLinea, "TH", "Th");
nuevaLinea = replace(nuevaLinea, "kSH", "k+Sh");
nuevaLinea = replace(nuevaLinea, "kaSH", "k+Sh");
nuevaLinea = replace(nuevaLinea, "SH", "Sh");
nuevaLinea = replace(nuevaLinea, ":", "H");
nuevaLinea = replace(nuevaLinea, "NH", "NaH");
nuevaLinea = replace(nuevaLinea, "dh", "d+h");
nuevaLinea = replace(nuevaLinea, "gh", "g+h");
nuevaLinea = replace(nuevaLinea, "bh", "b+h");
nuevaLinea = replace(nuevaLinea, "dzh", "dz+h");
nuevaLinea = replace(nuevaLinea, "aa", "a");
nuevaLinea = replace(nuevaLinea, "ai", "i");
nuevaLinea = replace(nuevaLinea, "aee", "ai");
nuevaLinea = replace(nuevaLinea, "au", "u");
nuevaLinea = replace(nuevaLinea, "aoo", "au");
nuevaLinea = replace(nuevaLinea, "ae", "e");
nuevaLinea = replace(nuevaLinea, "ao", "o");
nuevaLinea = replace(nuevaLinea, "ee", "ai");
nuevaLinea = replace(nuevaLinea, "oo", "au");
nuevaLinea = replace(nuevaLinea, "\'I", "\'q");
nuevaLinea = replace(nuevaLinea, "I", "-i");
nuevaLinea = replace(nuevaLinea, "\'q", "-I");
nuevaLinea = replace(nuevaLinea, "\\", "?");
nuevaLinea = replace(nuevaLinea, "`", "!");
nuevaLinea = replace(nuevaLinea, "ga-y", "g.y");
nuevaLinea = replace(nuevaLinea, "g-y", "g.y");
nuevaLinea = replace(nuevaLinea, "na-y", "n+y");
len = nuevaLinea.length();
for (i=0; i<len; i++)
{
ch = nuevaLinea.charAt(i);
switch(ch)
{
case '#':
nuevaLinea = nuevaLinea.substring(0,i) + "@##" + nuevaLinea.substring(i+1);
i+=3;
len+=2;
break;
case '*':
nuevaLinea = nuevaLinea.substring(0,i) + "@#" + nuevaLinea.substring(i+1);
i+=2;
len++;
break;
case '\'':
if (i>0 && i<len-1)
{
chP = nuevaLinea.charAt(i-1);
chN = nuevaLinea.charAt(i+1);
if (isVowel(chN))
{
if (Character.isLetter(chP) && !isVowel(chP))
{
nuevaLinea = nuevaLinea.substring(0, i) + Character.toUpperCase(chN) + nuevaLinea.substring(i+2);
len--;
}
else if (chP=='a' && (i==1 || i>1 && !Character.isLetter(nuevaLinea.charAt(i-2)) || chN == 'a' && (i+2==len || !Character.isLetter(nuevaLinea.charAt(i+2)))))
{
nuevaLinea = nuevaLinea.substring(0,i-1) + Character.toUpperCase(chN) + nuevaLinea.substring(i+2);
len-=2;
}
}
}
}
}
open = false;
for (i=0; i<len; i++)
{
ch = nuevaLinea.charAt(i);
if (ch=='/')
{
if (open)
{
nuevaLinea = nuevaLinea.substring(0, i) + ")" + nuevaLinea.substring(i+1);
open = false;
}
else
{
nuevaLinea = nuevaLinea.substring(0, i) + "(" + nuevaLinea.substring(i+1);
open = true;
}
}
}
nuevaLinea = replace(nuevaLinea, ",", "/");
return nuevaLinea; */
}
public static String wylieToAcip(String wylie)
{
TibetanDocument tibDoc = new TibetanDocument();
try
{
TibTextUtils.insertTibetanMachineWebForTranslit(false, wylie, tibDoc, 0, false);
}
catch (InvalidTransliterationException e)
{
return null;
}
return tibDoc.getACIP(new boolean[] { false });
/* DLC FIXME: for unknown things, return null.
if (wylie.equals("@##")) return "#";
if (wylie.equals("@#")) return "*";
if (wylie.equals("!")) return "`";
if (wylie.equals("b+h")) return "BH";
if (wylie.equals("d+h")) return "DH";
if (wylie.equals("X")) return null;
if (wylie.equals("iA")) return null;
if (wylie.equals("ai")) return "EE";
if (wylie.equals("au")) return "OO";
if (wylie.equals("$")) return null;
if (wylie.startsWith("@") || wylie.startsWith("#"))
return null; // we can't convert this in isolation! We need context.
char []caract;
int i, j, len;
String nuevaPalabra;
caract = wylie.toCharArray();
len = wylie.length();
for (j=0; j<len; j++)
{
i = j;
/*ciclo:
while(true) // para manejar excepciones; que honda!
{
switch(caract[i])
{
case 'A':
if (i>0)
{
i--;
break;
}
default:
if (Character.isLowerCase(caract[i]))
caract[i] = Character.toUpperCase(caract[i]);
else if (Character.isUpperCase(caract[i]))
caract[i] = Character.toLowerCase(caract[i]);
/* break ciclo;
}
}
}
nuevaPalabra = new String(caract);
// nuevaPalabra = palabra.toUpperCase();
// ahora hacer los cambios de Michael Roach
nuevaPalabra = replace(nuevaPalabra, "TSH", "TQQ");
nuevaPalabra = replace(nuevaPalabra, "TS", "TZ");
nuevaPalabra = replace(nuevaPalabra, "TQQ", "TS");
nuevaPalabra = replace(nuevaPalabra, "a", "'A");
nuevaPalabra = replace(nuevaPalabra, "i", "'I");
nuevaPalabra = replace(nuevaPalabra, "u", "'U");
nuevaPalabra = replace(nuevaPalabra, "-I", "i");
nuevaPalabra = replace(nuevaPalabra, "/", ",");
nuevaPalabra = replace(nuevaPalabra, "_", " ");
nuevaPalabra = replace(nuevaPalabra, "|", ";");
nuevaPalabra = fixWazur(nuevaPalabra);
return nuevaPalabra; */
}
public static String unicodeToWylie(String unicode)
{
String machineWylie;
TibetanDocument tibDoc = new TibetanDocument();
StringBuffer errors = new StringBuffer();
machineWylie = Converter.convertToEwtsForComputers(unicode, errors);
try
{
TibTextUtils.insertTibetanMachineWebForTranslit(true, machineWylie, tibDoc, 0, false);
}
catch (InvalidTransliterationException e)
{
return null;
}
return tibDoc.getWylie(new boolean[] { false });
}
/** From http://www.i18nfaq.com/2005/07/how-do-i-convert-ncr-format-to-java.html */
public static String NCR2UnicodeString(String str)
{

View file

@ -317,8 +317,8 @@ public class OnLineScannerFilter extends HttpServlet
} */
scanner.clearTokens();
in = Manipulate.NCR2UnicodeString(in);
if (Manipulate.guessIfUnicode(in)) in = Manipulate.unicodeToWylie(in);
else if (Manipulate.guessIfAcip(in)) in = Manipulate.acipToWylie(in);
if (Manipulate.guessIfUnicode(in)) in = BasicTibetanTranscriptionConverter.unicodeToWylie(in);
else if (Manipulate.guessIfAcip(in)) in = BasicTibetanTranscriptionConverter.acipToWylie(in);
scanner.scanBody(in);
scanner.finishUp();
printText(pw, tibetan);

View file

@ -142,11 +142,8 @@ public class StrictDuffPane extends DuffPane
if (pasteAsString)
{
String data = (String)contents.getTransferData(DataFlavor.stringFlavor);
if (Manipulate.guessIfUnicode(data))
{
StringBuffer errors = new StringBuffer();
data = Converter.convertToEwtsForComputers(data, errors);
} else if (Manipulate.guessIfAcip(data)) data = Manipulate.acipToWylie(data);
if (Manipulate.guessIfUnicode(data)) data = BasicTibetanTranscriptionConverter.unicodeToWylie(data);
else if (Manipulate.guessIfAcip(data)) data = BasicTibetanTranscriptionConverter.acipToWylie(data);
toTibetanMachineWeb(data, offset);
}

View file

@ -20,9 +20,6 @@ Contributor(s): ______________________________________.
to store the dictionary. */
package org.thdl.tib.scanner;
//import org.thdl.tib.text.TibetanHTML;
import org.thdl.tib.text.ttt.*;
/** Tibetan word with its corresponding definitions.
@author Andr&eacute;s Montano Pellegrini
@ -62,7 +59,7 @@ public class SwingWord extends Word
try
{
// localWord = TibetanHTML.getHTML(super.token + " ");
localWord = Manipulate.UnicodeString2NCR(EwtsToUnicodeForXslt.convertEwtsTo(super.token + " "));
localWord = BasicTibetanTranscriptionConverter.wylieToHTMLUnicode(super.token + " ");
}
catch (Exception e)
{
@ -92,7 +89,7 @@ public class SwingWord extends Word
{
try
{
result = Manipulate.UnicodeString2NCR(EwtsToUnicodeForXslt.convertEwtsTo(localWord + " "));
result = BasicTibetanTranscriptionConverter.wylieToHTMLUnicode(localWord + " ");
className = " class = \"tib\"";
}
catch (Exception e)

View file

@ -28,9 +28,9 @@ import org.thdl.util.ThdlVersion;
public abstract class TibetanScanner
{
public static final String version = "The Tibetan to English Translation Tool, version 3.3.0 compiled on " + ThdlVersion.getTimeOfCompilation() + ". ";
public static final String copyrightUnicode="Copyright " + '\u00A9' + " 2000-2005 by Andr" + '\u00E9' + "s Montano Pellegrini, all rights reserved.";
public static final String copyrightASCII="Copyright 2000-2005 by Andres Montano Pellegrini, all rights reserved.";
public static final String copyrightHTML="<hr><small><strong>" + version + "Copyright &copy; 2000-2005 by <a href=\"http://www.people.virginia.edu/~am2zb/\" target=\"_blank\">Andr&eacute;s Montano Pellegrini.</a><br/>All rights reserved.</strong></small>";
public static final String copyrightUnicode="Copyright " + '\u00A9' + " 2000-200??6 by Andr" + '\u00E9' + "s Montano Pellegrini, all rights reserved.";
public static final String copyrightASCII="Copyright 2000-2006 by Andres Montano Pellegrini, all rights reserved.";
public static final String copyrightHTML="<hr><small><strong>" + version + "Copyright &copy; 2000-2006 by <a href=\"http://www.people.virginia.edu/~am2zb/\" target=\"_blank\">Andr&eacute;s Montano Pellegrini.</a><br/>All rights reserved.</strong></small>";
public static final int NORMAL_MODE=1;
public static final int DEBUG_MODE=2;

View file

@ -39,6 +39,7 @@ import org.thdl.tib.text.tshegbar.UnicodeUtils;
import org.thdl.util.ThdlDebug;
import org.thdl.util.ThdlOptions;
import org.thdl.util.Trie;
import org.thdl.tib.scanner.BasicTibetanTranscriptionConverter;
/**
* Interfaces between Extended Wylie and the TibetanMachineWeb fonts.
@ -2016,7 +2017,7 @@ private static String acipForGlyph(String hashKey) {
else
// else we are not be able to use it because it's not smart
// about stacks (e.g., W+W)
return org.thdl.tib.scanner.Manipulate.wylieToAcip(hashKey);
return BasicTibetanTranscriptionConverter.wylieToAcip(hashKey);
}
/** Error that appears in a document when some TMW cannot be