Updated unicode to wylie converter to only convert chars in the tibetan range and ignore other chars.
This commit is contained in:
parent
36e222fccc
commit
ffb32b3207
1 changed files with 474 additions and 446 deletions
|
@ -1,447 +1,475 @@
|
||||||
/*
|
/*
|
||||||
The contents of this file are subject to the AMP Open Community License
|
The contents of this file are subject to the AMP Open Community License
|
||||||
Version 1.0 (the "License"); you may not use this file except in compliance
|
Version 1.0 (the "License"); you may not use this file except in compliance
|
||||||
with the License. You may obtain a copy of the License on the AMP web site
|
with the License. You may obtain a copy of the License on the AMP web site
|
||||||
(http://www.tibet.iteso.mx/Guatemala/).
|
(http://www.tibet.iteso.mx/Guatemala/).
|
||||||
|
|
||||||
Software distributed under the License is distributed on an "AS IS" basis,
|
Software distributed under the License is distributed on an "AS IS" basis,
|
||||||
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
|
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
|
||||||
License for the specific terms governing rights and limitations under the
|
License for the specific terms governing rights and limitations under the
|
||||||
License.
|
License.
|
||||||
|
|
||||||
The Initial Developer of this software is Andres Montano Pellegrini. Portions
|
The Initial Developer of this software is Andres Montano Pellegrini. Portions
|
||||||
created by Andres Montano Pellegrini are Copyright 2001 Andres Montano
|
created by Andres Montano Pellegrini are Copyright 2001 Andres Montano
|
||||||
Pellegrini. All Rights Reserved.
|
Pellegrini. All Rights Reserved.
|
||||||
|
|
||||||
Contributor(s): ______________________________________.
|
Contributor(s): ______________________________________.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package org.thdl.tib.scanner;
|
package org.thdl.tib.scanner;
|
||||||
|
|
||||||
import org.thdl.tib.text.InvalidTransliterationException;
|
import org.thdl.tib.text.InvalidTransliterationException;
|
||||||
import org.thdl.tib.text.TibTextUtils;
|
import org.thdl.tib.text.TibTextUtils;
|
||||||
import org.thdl.tib.text.TibetanDocument;
|
import org.thdl.tib.text.TibetanDocument;
|
||||||
import org.thdl.tib.text.reverter.Converter;
|
import org.thdl.tib.text.reverter.Converter;
|
||||||
import org.thdl.tib.text.ttt.EwtsToUnicodeForXslt;
|
import org.thdl.tib.text.ttt.EwtsToUnicodeForXslt;
|
||||||
import org.thdl.tib.input.*;
|
import org.thdl.tib.input.*;
|
||||||
import org.thdl.util.*;
|
import org.thdl.util.*;
|
||||||
import java.net.*;
|
import java.net.*;
|
||||||
import java.io.*;
|
import java.io.*;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Wrap-up class for the various converters that the Translation Tool needs.
|
* Wrap-up class for the various converters that the Translation Tool needs.
|
||||||
* All conversions are done by static methods meant to be as straight-forward
|
* All conversions are done by static methods meant to be as straight-forward
|
||||||
* and simple as possible not caring about error or warning messages.
|
* and simple as possible not caring about error or warning messages.
|
||||||
*
|
*
|
||||||
* @author Andres Montano
|
* @author Andres Montano
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
public class BasicTibetanTranscriptionConverter implements FontConverterConstants
|
public class BasicTibetanTranscriptionConverter implements FontConverterConstants
|
||||||
{
|
{
|
||||||
private static BufferedReader in;
|
private static BufferedReader in;
|
||||||
private static PrintWriter out;
|
private static PrintWriter out;
|
||||||
|
|
||||||
//private static int conversionType=0;
|
//private static int conversionType=0;
|
||||||
private static final int ACIP_TO_WYLIE=1;
|
private static final int ACIP_TO_WYLIE=1;
|
||||||
private static final int WYLIE_TO_ACIP=2;
|
private static final int WYLIE_TO_ACIP=2;
|
||||||
private static final int UNICODE_TO_WYLIE=3;
|
private static final int UNICODE_TO_WYLIE=3;
|
||||||
private static final int WYLIE_TO_UNICODE=4;
|
private static final int WYLIE_TO_UNICODE=4;
|
||||||
|
private static final int TIBETAN_UNICODE_RANGE[] = {3840, 4095};
|
||||||
/** Converts from the Acip transliteration scheme to EWTS.*/
|
|
||||||
public static String acipToWylie(String acip)
|
/** Converts from the Acip transliteration scheme to EWTS.*/
|
||||||
{
|
public static String acipToWylie(String acip)
|
||||||
TibetanDocument tibDoc = new TibetanDocument();
|
{
|
||||||
try
|
TibetanDocument tibDoc = new TibetanDocument();
|
||||||
{
|
try
|
||||||
TibTextUtils.insertTibetanMachineWebForTranslit(false, acip, tibDoc, 0, false);
|
{
|
||||||
}
|
TibTextUtils.insertTibetanMachineWebForTranslit(false, acip, tibDoc, 0, false);
|
||||||
catch (InvalidTransliterationException e)
|
}
|
||||||
{
|
catch (InvalidTransliterationException e)
|
||||||
return null;
|
{
|
||||||
}
|
return null;
|
||||||
return tibDoc.getWylie(new boolean[] { false });
|
}
|
||||||
|
return tibDoc.getWylie(new boolean[] { false });
|
||||||
/* char caract[], ch, chP, chN;
|
|
||||||
String nuevaLinea;
|
/* char caract[], ch, chP, chN;
|
||||||
int i, len;
|
String nuevaLinea;
|
||||||
boolean open;
|
int i, len;
|
||||||
|
boolean open;
|
||||||
caract = acip.toCharArray();
|
|
||||||
len = acip.length();
|
caract = acip.toCharArray();
|
||||||
for (i=0; i<len; i++)
|
len = acip.length();
|
||||||
{
|
for (i=0; i<len; i++)
|
||||||
if (Character.isLowerCase(caract[i]))
|
{
|
||||||
caract[i] = Character.toUpperCase(caract[i]);
|
if (Character.isLowerCase(caract[i]))
|
||||||
else if (Character.isUpperCase(caract[i]))
|
caract[i] = Character.toUpperCase(caract[i]);
|
||||||
caract[i] = Character.toLowerCase(caract[i]);
|
else if (Character.isUpperCase(caract[i]))
|
||||||
}
|
caract[i] = Character.toLowerCase(caract[i]);
|
||||||
nuevaLinea = new String(caract);
|
}
|
||||||
|
nuevaLinea = new String(caract);
|
||||||
/* ahora hacer los cambios de Michael Roach ts -> tsh, tz -> ts, v -> w,
|
|
||||||
TH -> Th, kSH, kaSH -> k+Sh, SH -> Sh, : -> H, dh -> d+h, gh -> g+h, bh -> b+h, dzh -> dz+h,
|
/* ahora hacer los cambios de Michael Roach ts -> tsh, tz -> ts, v -> w,
|
||||||
aa -> a, a'a -> A, ai->i, aee ->ai, au->u, aoo->au, ae->e,
|
TH -> Th, kSH, kaSH -> k+Sh, SH -> Sh, : -> H, dh -> d+h, gh -> g+h, bh -> b+h, dzh -> dz+h,
|
||||||
ao->o, ee->ai, oo->au, 'I->-I I->-i, a'i->I, a'u->U, a'e->E, a'o->O,
|
aa -> a, a'a -> A, ai->i, aee ->ai, au->u, aoo->au, ae->e,
|
||||||
a'i->I, a'u->U, a'e->E, a'o->O, ,->/, # -> @##, * -> @#, \ -> ?, ` -> !,
|
ao->o, ee->ai, oo->au, 'I->-I I->-i, a'i->I, a'u->U, a'e->E, a'o->O,
|
||||||
/-/ -> (-), ga-y -> g.y, g-y -> g.y, na-y -> n+y */
|
a'i->I, a'u->U, a'e->E, a'o->O, ,->/, # -> @##, * -> @#, \ -> ?, ` -> !,
|
||||||
|
/-/ -> (-), ga-y -> g.y, g-y -> g.y, na-y -> n+y */
|
||||||
/* nuevaLinea = Manipulate.replace(nuevaLinea, "ts", "tq");
|
|
||||||
nuevaLinea = Manipulate.replace(nuevaLinea, "tz", "ts");
|
/* nuevaLinea = Manipulate.replace(nuevaLinea, "ts", "tq");
|
||||||
nuevaLinea = Manipulate.replace(nuevaLinea, "tq", "tsh");
|
nuevaLinea = Manipulate.replace(nuevaLinea, "tz", "ts");
|
||||||
nuevaLinea = Manipulate.replace(nuevaLinea, "v", "w");
|
nuevaLinea = Manipulate.replace(nuevaLinea, "tq", "tsh");
|
||||||
nuevaLinea = Manipulate.replace(nuevaLinea, "TH", "Th");
|
nuevaLinea = Manipulate.replace(nuevaLinea, "v", "w");
|
||||||
nuevaLinea = Manipulate.replace(nuevaLinea, "kSH", "k+Sh");
|
nuevaLinea = Manipulate.replace(nuevaLinea, "TH", "Th");
|
||||||
nuevaLinea = Manipulate.replace(nuevaLinea, "kaSH", "k+Sh");
|
nuevaLinea = Manipulate.replace(nuevaLinea, "kSH", "k+Sh");
|
||||||
nuevaLinea = Manipulate.replace(nuevaLinea, "SH", "Sh");
|
nuevaLinea = Manipulate.replace(nuevaLinea, "kaSH", "k+Sh");
|
||||||
nuevaLinea = Manipulate.replace(nuevaLinea, ":", "H");
|
nuevaLinea = Manipulate.replace(nuevaLinea, "SH", "Sh");
|
||||||
nuevaLinea = Manipulate.replace(nuevaLinea, "NH", "NaH");
|
nuevaLinea = Manipulate.replace(nuevaLinea, ":", "H");
|
||||||
nuevaLinea = Manipulate.replace(nuevaLinea, "dh", "d+h");
|
nuevaLinea = Manipulate.replace(nuevaLinea, "NH", "NaH");
|
||||||
nuevaLinea = Manipulate.replace(nuevaLinea, "gh", "g+h");
|
nuevaLinea = Manipulate.replace(nuevaLinea, "dh", "d+h");
|
||||||
nuevaLinea = Manipulate.replace(nuevaLinea, "bh", "b+h");
|
nuevaLinea = Manipulate.replace(nuevaLinea, "gh", "g+h");
|
||||||
nuevaLinea = Manipulate.replace(nuevaLinea, "dzh", "dz+h");
|
nuevaLinea = Manipulate.replace(nuevaLinea, "bh", "b+h");
|
||||||
nuevaLinea = Manipulate.replace(nuevaLinea, "aa", "a");
|
nuevaLinea = Manipulate.replace(nuevaLinea, "dzh", "dz+h");
|
||||||
nuevaLinea = Manipulate.replace(nuevaLinea, "ai", "i");
|
nuevaLinea = Manipulate.replace(nuevaLinea, "aa", "a");
|
||||||
nuevaLinea = Manipulate.replace(nuevaLinea, "aee", "ai");
|
nuevaLinea = Manipulate.replace(nuevaLinea, "ai", "i");
|
||||||
nuevaLinea = Manipulate.replace(nuevaLinea, "au", "u");
|
nuevaLinea = Manipulate.replace(nuevaLinea, "aee", "ai");
|
||||||
nuevaLinea = Manipulate.replace(nuevaLinea, "aoo", "au");
|
nuevaLinea = Manipulate.replace(nuevaLinea, "au", "u");
|
||||||
nuevaLinea = Manipulate.replace(nuevaLinea, "ae", "e");
|
nuevaLinea = Manipulate.replace(nuevaLinea, "aoo", "au");
|
||||||
nuevaLinea = Manipulate.replace(nuevaLinea, "ao", "o");
|
nuevaLinea = Manipulate.replace(nuevaLinea, "ae", "e");
|
||||||
nuevaLinea = Manipulate.replace(nuevaLinea, "ee", "ai");
|
nuevaLinea = Manipulate.replace(nuevaLinea, "ao", "o");
|
||||||
nuevaLinea = Manipulate.replace(nuevaLinea, "oo", "au");
|
nuevaLinea = Manipulate.replace(nuevaLinea, "ee", "ai");
|
||||||
nuevaLinea = Manipulate.replace(nuevaLinea, "\'I", "\'q");
|
nuevaLinea = Manipulate.replace(nuevaLinea, "oo", "au");
|
||||||
nuevaLinea = Manipulate.replace(nuevaLinea, "I", "-i");
|
nuevaLinea = Manipulate.replace(nuevaLinea, "\'I", "\'q");
|
||||||
nuevaLinea = Manipulate.replace(nuevaLinea, "\'q", "-I");
|
nuevaLinea = Manipulate.replace(nuevaLinea, "I", "-i");
|
||||||
nuevaLinea = Manipulate.replace(nuevaLinea, "\\", "?");
|
nuevaLinea = Manipulate.replace(nuevaLinea, "\'q", "-I");
|
||||||
nuevaLinea = Manipulate.replace(nuevaLinea, "`", "!");
|
nuevaLinea = Manipulate.replace(nuevaLinea, "\\", "?");
|
||||||
nuevaLinea = Manipulate.replace(nuevaLinea, "ga-y", "g.y");
|
nuevaLinea = Manipulate.replace(nuevaLinea, "`", "!");
|
||||||
nuevaLinea = Manipulate.replace(nuevaLinea, "g-y", "g.y");
|
nuevaLinea = Manipulate.replace(nuevaLinea, "ga-y", "g.y");
|
||||||
nuevaLinea = Manipulate.replace(nuevaLinea, "na-y", "n+y");
|
nuevaLinea = Manipulate.replace(nuevaLinea, "g-y", "g.y");
|
||||||
|
nuevaLinea = Manipulate.replace(nuevaLinea, "na-y", "n+y");
|
||||||
len = nuevaLinea.length();
|
|
||||||
for (i=0; i<len; i++)
|
len = nuevaLinea.length();
|
||||||
{
|
for (i=0; i<len; i++)
|
||||||
ch = nuevaLinea.charAt(i);
|
{
|
||||||
switch(ch)
|
ch = nuevaLinea.charAt(i);
|
||||||
{
|
switch(ch)
|
||||||
case '#':
|
{
|
||||||
nuevaLinea = nuevaLinea.substring(0,i) + "@##" + nuevaLinea.substring(i+1);
|
case '#':
|
||||||
i+=3;
|
nuevaLinea = nuevaLinea.substring(0,i) + "@##" + nuevaLinea.substring(i+1);
|
||||||
len+=2;
|
i+=3;
|
||||||
break;
|
len+=2;
|
||||||
case '*':
|
break;
|
||||||
nuevaLinea = nuevaLinea.substring(0,i) + "@#" + nuevaLinea.substring(i+1);
|
case '*':
|
||||||
i+=2;
|
nuevaLinea = nuevaLinea.substring(0,i) + "@#" + nuevaLinea.substring(i+1);
|
||||||
len++;
|
i+=2;
|
||||||
break;
|
len++;
|
||||||
case '\'':
|
break;
|
||||||
if (i>0 && i<len-1)
|
case '\'':
|
||||||
{
|
if (i>0 && i<len-1)
|
||||||
chP = nuevaLinea.charAt(i-1);
|
{
|
||||||
chN = nuevaLinea.charAt(i+1);
|
chP = nuevaLinea.charAt(i-1);
|
||||||
if (Manipulate.isVowel(chN))
|
chN = nuevaLinea.charAt(i+1);
|
||||||
{
|
if (Manipulate.isVowel(chN))
|
||||||
if (Character.isLetter(chP) && !Manipulate.isVowel(chP))
|
{
|
||||||
{
|
if (Character.isLetter(chP) && !Manipulate.isVowel(chP))
|
||||||
nuevaLinea = nuevaLinea.substring(0, i) + Character.toUpperCase(chN) + nuevaLinea.substring(i+2);
|
{
|
||||||
len--;
|
nuevaLinea = nuevaLinea.substring(0, i) + Character.toUpperCase(chN) + nuevaLinea.substring(i+2);
|
||||||
}
|
len--;
|
||||||
else if (chP=='a' && (i==1 || i>1 && !Character.isLetter(nuevaLinea.charAt(i-2)) || chN == 'a' && (i+2==len || !Character.isLetter(nuevaLinea.charAt(i+2)))))
|
}
|
||||||
{
|
else if (chP=='a' && (i==1 || i>1 && !Character.isLetter(nuevaLinea.charAt(i-2)) || chN == 'a' && (i+2==len || !Character.isLetter(nuevaLinea.charAt(i+2)))))
|
||||||
nuevaLinea = nuevaLinea.substring(0,i-1) + Character.toUpperCase(chN) + nuevaLinea.substring(i+2);
|
{
|
||||||
len-=2;
|
nuevaLinea = nuevaLinea.substring(0,i-1) + Character.toUpperCase(chN) + nuevaLinea.substring(i+2);
|
||||||
}
|
len-=2;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
open = false;
|
|
||||||
for (i=0; i<len; i++)
|
open = false;
|
||||||
{
|
for (i=0; i<len; i++)
|
||||||
ch = nuevaLinea.charAt(i);
|
{
|
||||||
if (ch=='/')
|
ch = nuevaLinea.charAt(i);
|
||||||
{
|
if (ch=='/')
|
||||||
if (open)
|
{
|
||||||
{
|
if (open)
|
||||||
nuevaLinea = nuevaLinea.substring(0, i) + ")" + nuevaLinea.substring(i+1);
|
{
|
||||||
open = false;
|
nuevaLinea = nuevaLinea.substring(0, i) + ")" + nuevaLinea.substring(i+1);
|
||||||
}
|
open = false;
|
||||||
|
}
|
||||||
else
|
|
||||||
{
|
else
|
||||||
nuevaLinea = nuevaLinea.substring(0, i) + "(" + nuevaLinea.substring(i+1);
|
{
|
||||||
open = true;
|
nuevaLinea = nuevaLinea.substring(0, i) + "(" + nuevaLinea.substring(i+1);
|
||||||
}
|
open = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
nuevaLinea = Manipulate.replace(nuevaLinea, ",", "/");
|
}
|
||||||
|
nuevaLinea = Manipulate.replace(nuevaLinea, ",", "/");
|
||||||
return nuevaLinea; */
|
|
||||||
}
|
return nuevaLinea; */
|
||||||
|
}
|
||||||
/** Converts from EWTS to the ACIP transliteration scheme. */
|
|
||||||
public static String wylieToAcip(String wylie)
|
/** Converts from EWTS to the ACIP transliteration scheme. */
|
||||||
{
|
public static String wylieToAcip(String wylie)
|
||||||
TibetanDocument tibDoc = new TibetanDocument();
|
{
|
||||||
try
|
TibetanDocument tibDoc = new TibetanDocument();
|
||||||
{
|
try
|
||||||
TibTextUtils.insertTibetanMachineWebForTranslit(true, wylie, tibDoc, 0, false);
|
{
|
||||||
}
|
TibTextUtils.insertTibetanMachineWebForTranslit(true, wylie, tibDoc, 0, false);
|
||||||
catch (InvalidTransliterationException e)
|
}
|
||||||
{
|
catch (InvalidTransliterationException e)
|
||||||
return null;
|
{
|
||||||
}
|
return null;
|
||||||
return tibDoc.getACIP(new boolean[] { false });
|
}
|
||||||
|
return tibDoc.getACIP(new boolean[] { false });
|
||||||
/* DLC FIXME: for unknown things, return null. */
|
|
||||||
/* if (wylie.equals("@##")) return "#";
|
/* DLC FIXME: for unknown things, return null. */
|
||||||
if (wylie.equals("@#")) return "*";
|
/* if (wylie.equals("@##")) return "#";
|
||||||
if (wylie.equals("!")) return "`";
|
if (wylie.equals("@#")) return "*";
|
||||||
if (wylie.equals("b+h")) return "BH";
|
if (wylie.equals("!")) return "`";
|
||||||
if (wylie.equals("d+h")) return "DH";
|
if (wylie.equals("b+h")) return "BH";
|
||||||
if (wylie.equals("X")) return null;
|
if (wylie.equals("d+h")) return "DH";
|
||||||
if (wylie.equals("iA")) return null;
|
if (wylie.equals("X")) return null;
|
||||||
if (wylie.equals("ai")) return "EE";
|
if (wylie.equals("iA")) return null;
|
||||||
if (wylie.equals("au")) return "OO";
|
if (wylie.equals("ai")) return "EE";
|
||||||
if (wylie.equals("$")) return null;
|
if (wylie.equals("au")) return "OO";
|
||||||
if (wylie.startsWith("@") || wylie.startsWith("#"))
|
if (wylie.equals("$")) return null;
|
||||||
return null; // we can't convert this in isolation! We need context.
|
if (wylie.startsWith("@") || wylie.startsWith("#"))
|
||||||
char []caract;
|
return null; // we can't convert this in isolation! We need context.
|
||||||
int i, j, len;
|
char []caract;
|
||||||
String nuevaPalabra;
|
int i, j, len;
|
||||||
|
String nuevaPalabra;
|
||||||
caract = wylie.toCharArray();
|
|
||||||
len = wylie.length();
|
caract = wylie.toCharArray();
|
||||||
for (j=0; j<len; j++)
|
len = wylie.length();
|
||||||
{
|
for (j=0; j<len; j++)
|
||||||
i = j;
|
{
|
||||||
//ciclo:
|
i = j;
|
||||||
while(true) // para manejar excepciones; que honda!
|
//ciclo:
|
||||||
{
|
while(true) // para manejar excepciones; que honda!
|
||||||
switch(caract[i])
|
{
|
||||||
{
|
switch(caract[i])
|
||||||
case 'A':
|
{
|
||||||
if (i>0)
|
case 'A':
|
||||||
{
|
if (i>0)
|
||||||
i--;
|
{
|
||||||
break;
|
i--;
|
||||||
}
|
break;
|
||||||
default:
|
}
|
||||||
if (Character.isLowerCase(caract[i]))
|
default:
|
||||||
caract[i] = Character.toUpperCase(caract[i]);
|
if (Character.isLowerCase(caract[i]))
|
||||||
else if (Character.isUpperCase(caract[i]))
|
caract[i] = Character.toUpperCase(caract[i]);
|
||||||
caract[i] = Character.toLowerCase(caract[i]);
|
else if (Character.isUpperCase(caract[i]))
|
||||||
// break ciclo;
|
caract[i] = Character.toLowerCase(caract[i]);
|
||||||
}
|
// break ciclo;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
nuevaPalabra = new String(caract);
|
}
|
||||||
// nuevaPalabra = palabra.toUpperCase();
|
nuevaPalabra = new String(caract);
|
||||||
|
// nuevaPalabra = palabra.toUpperCase();
|
||||||
// ahora hacer los cambios de Michael Roach
|
|
||||||
|
// ahora hacer los cambios de Michael Roach
|
||||||
nuevaPalabra = Manipulate.replace(nuevaPalabra, "TSH", "TQQ");
|
|
||||||
nuevaPalabra = Manipulate.replace(nuevaPalabra, "TS", "TZ");
|
nuevaPalabra = Manipulate.replace(nuevaPalabra, "TSH", "TQQ");
|
||||||
nuevaPalabra = Manipulate.replace(nuevaPalabra, "TQQ", "TS");
|
nuevaPalabra = Manipulate.replace(nuevaPalabra, "TS", "TZ");
|
||||||
nuevaPalabra = Manipulate.replace(nuevaPalabra, "a", "'A");
|
nuevaPalabra = Manipulate.replace(nuevaPalabra, "TQQ", "TS");
|
||||||
nuevaPalabra = Manipulate.replace(nuevaPalabra, "i", "'I");
|
nuevaPalabra = Manipulate.replace(nuevaPalabra, "a", "'A");
|
||||||
nuevaPalabra = Manipulate.replace(nuevaPalabra, "u", "'U");
|
nuevaPalabra = Manipulate.replace(nuevaPalabra, "i", "'I");
|
||||||
nuevaPalabra = Manipulate.replace(nuevaPalabra, "-I", "i");
|
nuevaPalabra = Manipulate.replace(nuevaPalabra, "u", "'U");
|
||||||
nuevaPalabra = Manipulate.replace(nuevaPalabra, "/", ",");
|
nuevaPalabra = Manipulate.replace(nuevaPalabra, "-I", "i");
|
||||||
nuevaPalabra = Manipulate.replace(nuevaPalabra, "_", " ");
|
nuevaPalabra = Manipulate.replace(nuevaPalabra, "/", ",");
|
||||||
nuevaPalabra = Manipulate.replace(nuevaPalabra, "|", ";");
|
nuevaPalabra = Manipulate.replace(nuevaPalabra, "_", " ");
|
||||||
nuevaPalabra = Manipulate.fixWazur(nuevaPalabra);
|
nuevaPalabra = Manipulate.replace(nuevaPalabra, "|", ";");
|
||||||
return nuevaPalabra;*/
|
nuevaPalabra = Manipulate.fixWazur(nuevaPalabra);
|
||||||
}
|
return nuevaPalabra;*/
|
||||||
|
}
|
||||||
/** Converts Tibetan Unicode to EWTS. */
|
|
||||||
public static String unicodeToWylie(String unicode)
|
private static int getTibetanUnicodeStart(String unicode, int pos)
|
||||||
{
|
{
|
||||||
String machineWylie;
|
for(; pos < unicode.length(); pos++ ) if(unicode.codePointAt(pos)>=TIBETAN_UNICODE_RANGE[0] && unicode.codePointAt(pos)<=TIBETAN_UNICODE_RANGE[1]) return pos;
|
||||||
TibetanDocument tibDoc = new TibetanDocument();
|
return -1;
|
||||||
StringBuffer errors = new StringBuffer();
|
}
|
||||||
|
|
||||||
machineWylie = Converter.convertToEwtsForComputers(unicode, errors);
|
private static int getTibetanUnicodeEnd(String unicode, int pos)
|
||||||
try
|
{
|
||||||
{
|
for(; pos < unicode.length(); pos++ ) if(unicode.codePointAt(pos)<TIBETAN_UNICODE_RANGE[0] || unicode.codePointAt(pos)>TIBETAN_UNICODE_RANGE[1]) return pos;
|
||||||
TibTextUtils.insertTibetanMachineWebForTranslit(true, machineWylie, tibDoc, 0, false);
|
return pos;
|
||||||
}
|
}
|
||||||
catch (InvalidTransliterationException e)
|
|
||||||
{
|
/** Converts Tibetan Unicode to EWTS. */
|
||||||
return null;
|
public static String unicodeToWylie(String unicode)
|
||||||
}
|
{
|
||||||
return tibDoc.getWylie(new boolean[] { false });
|
|
||||||
}
|
String machineWylie, startString, tibetanString, endString;
|
||||||
|
TibetanDocument tibDoc;
|
||||||
/** Converts EWTS to Tibetan Unicode. */
|
StringBuffer errors;
|
||||||
public static String wylieToUnicode(String wylie)
|
int posStart=0, posEnd;
|
||||||
{
|
while((posStart = getTibetanUnicodeStart(unicode, posStart))>=0)
|
||||||
return EwtsToUnicodeForXslt.convertEwtsTo(wylie);
|
{
|
||||||
}
|
posEnd = getTibetanUnicodeEnd(unicode, posStart+1);
|
||||||
|
startString = unicode.substring(0, posStart);
|
||||||
/** Converts EWTS to Tibetan Unicode represented in NCR. */
|
tibetanString = unicode.substring(posStart, posEnd);
|
||||||
public static String wylieToHTMLUnicode(String wylie)
|
endString = unicode.substring(posEnd);
|
||||||
{
|
|
||||||
return Manipulate.UnicodeString2NCR(wylieToUnicode(wylie));
|
tibDoc = new TibetanDocument();
|
||||||
}
|
errors = new StringBuffer();
|
||||||
|
machineWylie = Converter.convertToEwtsForComputers(tibetanString, errors);
|
||||||
/** Converts Tibetan Unicode represented in NCR to EWTS. */
|
try
|
||||||
public static String HTMLUnicodeToWylie(String unicode)
|
{
|
||||||
{
|
TibTextUtils.insertTibetanMachineWebForTranslit(true, machineWylie, tibDoc, 0, false);
|
||||||
return unicodeToWylie(Manipulate.NCR2UnicodeString(unicode));
|
}
|
||||||
}
|
catch (InvalidTransliterationException e)
|
||||||
|
{
|
||||||
public static void printSyntax()
|
return null;
|
||||||
{
|
}
|
||||||
System.out.println("Syntax: BasicTibetanTranscriptionConverter [-format format-of-files | [-fi format-of-input-file] [-fo format-of-output-file]] [-it acip | wylie | UTF16] [-ot acip | wylie | UTF16] input-file [output-file]");
|
unicode = startString + tibDoc.getWylie(new boolean[] { false }) + endString;
|
||||||
}
|
}
|
||||||
|
return unicode;
|
||||||
public BasicTibetanTranscriptionConverter(BufferedReader in, PrintWriter out)
|
}
|
||||||
{
|
|
||||||
BasicTibetanTranscriptionConverter.in = in;
|
/** Converts EWTS to Tibetan Unicode. */
|
||||||
BasicTibetanTranscriptionConverter.out = out;
|
public static String wylieToUnicode(String wylie)
|
||||||
}
|
{
|
||||||
|
return EwtsToUnicodeForXslt.convertEwtsTo(wylie);
|
||||||
|
}
|
||||||
public static void main (String[] args) throws Exception
|
|
||||||
{
|
/** Converts EWTS to Tibetan Unicode represented in NCR. */
|
||||||
PrintWriter out;
|
public static String wylieToHTMLUnicode(String wylie)
|
||||||
BufferedReader in=null;
|
{
|
||||||
int argNum = args.length, currentArg=0;
|
return Manipulate.UnicodeString2NCR(wylieToUnicode(wylie));
|
||||||
String option;
|
}
|
||||||
String formatIn = null, formatOut = null, inputTransSyst="wylie", outputTransSyst="wylie";
|
|
||||||
boolean file = false;
|
/** Converts Tibetan Unicode represented in NCR to EWTS. */
|
||||||
int conversionType=0;
|
public static String HTMLUnicodeToWylie(String unicode)
|
||||||
|
{
|
||||||
if (argNum<=currentArg)
|
return unicodeToWylie(Manipulate.NCR2UnicodeString(unicode));
|
||||||
{
|
}
|
||||||
printSyntax();
|
|
||||||
return;
|
public static void printSyntax()
|
||||||
}
|
{
|
||||||
|
System.out.println("Syntax: BasicTibetanTranscriptionConverter [-format format-of-files | [-fi format-of-input-file] [-fo format-of-output-file]] [-it acip | wylie | UTF16] [-ot acip | wylie | UTF16] input-file [output-file]");
|
||||||
while (args[currentArg].charAt(0)=='-')
|
}
|
||||||
{
|
|
||||||
option = args[currentArg++].substring(1);
|
public BasicTibetanTranscriptionConverter(BufferedReader in, PrintWriter out)
|
||||||
if (option.equals("format"))
|
{
|
||||||
{
|
BasicTibetanTranscriptionConverter.in = in;
|
||||||
formatIn = formatOut = args[currentArg];
|
BasicTibetanTranscriptionConverter.out = out;
|
||||||
} else if (option.equals("fi"))
|
}
|
||||||
{
|
|
||||||
formatIn = args[currentArg];
|
|
||||||
} else if (option.equals("fo"))
|
public static void main (String[] args) throws Exception
|
||||||
{
|
{
|
||||||
formatOut = args[currentArg];
|
PrintWriter out;
|
||||||
} else if (option.equals("it"))
|
BufferedReader in=null;
|
||||||
{
|
int argNum = args.length, currentArg=0;
|
||||||
inputTransSyst = args[currentArg];
|
String option;
|
||||||
} else if (option.equals("ot"))
|
String formatIn = null, formatOut = null, inputTransSyst="wylie", outputTransSyst="wylie";
|
||||||
{
|
boolean file = false;
|
||||||
outputTransSyst = args[currentArg];
|
int conversionType=0;
|
||||||
}
|
|
||||||
currentArg++;
|
if (argNum<=currentArg)
|
||||||
}
|
{
|
||||||
|
printSyntax();
|
||||||
if (!inputTransSyst.equals(outputTransSyst))
|
return;
|
||||||
{
|
}
|
||||||
if (inputTransSyst.equals("wylie"))
|
|
||||||
{
|
while (args[currentArg].charAt(0)=='-')
|
||||||
if (outputTransSyst.equals("acip")) conversionType = WYLIE_TO_ACIP;
|
{
|
||||||
else conversionType = WYLIE_TO_UNICODE;
|
option = args[currentArg++].substring(1);
|
||||||
}
|
if (option.equals("format"))
|
||||||
else if (inputTransSyst.equals("acip")) conversionType = ACIP_TO_WYLIE;
|
{
|
||||||
else conversionType = UNICODE_TO_WYLIE;
|
formatIn = formatOut = args[currentArg];
|
||||||
}
|
} else if (option.equals("fi"))
|
||||||
|
{
|
||||||
switch (args.length-currentArg)
|
formatIn = args[currentArg];
|
||||||
{
|
} else if (option.equals("fo"))
|
||||||
case 0:
|
{
|
||||||
if (formatIn != null)
|
formatOut = args[currentArg];
|
||||||
{
|
} else if (option.equals("it"))
|
||||||
System.out.println("Syntax error: input file name expected.");
|
{
|
||||||
return;
|
inputTransSyst = args[currentArg];
|
||||||
}
|
} else if (option.equals("ot"))
|
||||||
out = new PrintWriter(System.out);
|
{
|
||||||
in = new BufferedReader(new InputStreamReader(System.in));
|
outputTransSyst = args[currentArg];
|
||||||
break;
|
}
|
||||||
case 1:
|
currentArg++;
|
||||||
if (formatOut != null)
|
}
|
||||||
{
|
|
||||||
System.out.println("Syntax error: output file name expected.");
|
if (!inputTransSyst.equals(outputTransSyst))
|
||||||
return;
|
{
|
||||||
}
|
if (inputTransSyst.equals("wylie"))
|
||||||
out = new PrintWriter(System.out);
|
{
|
||||||
file = true;
|
if (outputTransSyst.equals("acip")) conversionType = WYLIE_TO_ACIP;
|
||||||
break;
|
else conversionType = WYLIE_TO_UNICODE;
|
||||||
default:
|
}
|
||||||
if (formatOut != null)
|
else if (inputTransSyst.equals("acip")) conversionType = ACIP_TO_WYLIE;
|
||||||
out = new PrintWriter(new OutputStreamWriter(new FileOutputStream(args[currentArg + 1]), formatOut));
|
else conversionType = UNICODE_TO_WYLIE;
|
||||||
else
|
}
|
||||||
out = new PrintWriter(new OutputStreamWriter(new FileOutputStream(args[currentArg + 1])));
|
|
||||||
file = true;
|
switch (args.length-currentArg)
|
||||||
}
|
{
|
||||||
if (file)
|
case 0:
|
||||||
{
|
if (formatIn != null)
|
||||||
in = getBufferedReader (args[currentArg], formatIn);
|
{
|
||||||
}
|
System.out.println("Syntax error: input file name expected.");
|
||||||
|
return;
|
||||||
new BasicTibetanTranscriptionConverter(in, out).run(conversionType);
|
}
|
||||||
}
|
out = new PrintWriter(System.out);
|
||||||
|
in = new BufferedReader(new InputStreamReader(System.in));
|
||||||
/**
|
break;
|
||||||
* This method was added for compatibility's sake with the FontConverterConstants interfase.
|
case 1:
|
||||||
*
|
if (formatOut != null)
|
||||||
* @param conversionType
|
{
|
||||||
* @throws IOException
|
System.out.println("Syntax error: output file name expected.");
|
||||||
*/
|
return;
|
||||||
public void run(String conversionType) throws IOException
|
}
|
||||||
{
|
out = new PrintWriter(System.out);
|
||||||
int conversionTypeInt=0;
|
file = true;
|
||||||
if (conversionType==ACIP_TO_WYLIE_TEXT) conversionTypeInt = ACIP_TO_WYLIE;
|
break;
|
||||||
if (conversionType==WYLIE_TO_ACIP_TEXT) conversionTypeInt = WYLIE_TO_ACIP;
|
default:
|
||||||
if (conversionType==UNI_TO_WYLIE_TEXT) conversionTypeInt = UNICODE_TO_WYLIE;
|
if (formatOut != null)
|
||||||
if (conversionType==WYLIE_TO_UNI_TEXT) conversionTypeInt = WYLIE_TO_UNICODE;
|
out = new PrintWriter(new OutputStreamWriter(new FileOutputStream(args[currentArg + 1]), formatOut));
|
||||||
run(conversionTypeInt);
|
else
|
||||||
}
|
out = new PrintWriter(new OutputStreamWriter(new FileOutputStream(args[currentArg + 1])));
|
||||||
|
file = true;
|
||||||
public void run(int conversionType) throws IOException
|
}
|
||||||
{
|
if (file)
|
||||||
String linea, result;
|
{
|
||||||
|
in = getBufferedReader (args[currentArg], formatIn);
|
||||||
while ((linea=in.readLine())!=null)
|
}
|
||||||
{
|
|
||||||
switch(conversionType)
|
new BasicTibetanTranscriptionConverter(in, out).run(conversionType);
|
||||||
{
|
}
|
||||||
case ACIP_TO_WYLIE:
|
|
||||||
result = acipToWylie(linea);
|
/**
|
||||||
break;
|
* This method was added for compatibility's sake with the FontConverterConstants interfase.
|
||||||
case WYLIE_TO_ACIP:
|
*
|
||||||
result = wylieToAcip(linea);
|
* @param conversionType
|
||||||
break;
|
* @throws IOException
|
||||||
case UNICODE_TO_WYLIE:
|
*/
|
||||||
result = unicodeToWylie(linea);
|
public void run(String conversionType) throws IOException
|
||||||
break;
|
{
|
||||||
case WYLIE_TO_UNICODE:
|
int conversionTypeInt=0;
|
||||||
result = wylieToUnicode(linea);
|
if (conversionType==ACIP_TO_WYLIE_TEXT) conversionTypeInt = ACIP_TO_WYLIE;
|
||||||
break;
|
if (conversionType==WYLIE_TO_ACIP_TEXT) conversionTypeInt = WYLIE_TO_ACIP;
|
||||||
default: result = linea;
|
if (conversionType==UNI_TO_WYLIE_TEXT) conversionTypeInt = UNICODE_TO_WYLIE;
|
||||||
}
|
if (conversionType==WYLIE_TO_UNI_TEXT) conversionTypeInt = WYLIE_TO_UNICODE;
|
||||||
if (result!=null) out.println(result);
|
run(conversionTypeInt);
|
||||||
}
|
}
|
||||||
out.flush();
|
|
||||||
}
|
public void run(int conversionType) throws IOException
|
||||||
|
{
|
||||||
public static BufferedReader getBufferedReader(String s, String format) throws Exception
|
String linea, result;
|
||||||
{
|
|
||||||
InputStream is;
|
while ((linea=in.readLine())!=null)
|
||||||
|
{
|
||||||
if (s.indexOf("http://") >= 0)
|
switch(conversionType)
|
||||||
is = new BufferedInputStream((new URL(s)).openStream());
|
{
|
||||||
else
|
case ACIP_TO_WYLIE:
|
||||||
is = new FileInputStream(s);
|
result = acipToWylie(linea);
|
||||||
|
break;
|
||||||
if (format==null)
|
case WYLIE_TO_ACIP:
|
||||||
return new BufferedReader(new InputStreamReader(is));
|
result = wylieToAcip(linea);
|
||||||
else
|
break;
|
||||||
return new BufferedReader(new InputStreamReader(is, format));
|
case UNICODE_TO_WYLIE:
|
||||||
|
result = unicodeToWylie(linea);
|
||||||
}
|
break;
|
||||||
|
case WYLIE_TO_UNICODE:
|
||||||
|
result = wylieToUnicode(linea);
|
||||||
|
break;
|
||||||
|
default: result = linea;
|
||||||
|
}
|
||||||
|
if (result!=null)
|
||||||
|
{
|
||||||
|
out.println(result);
|
||||||
|
out.flush();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static BufferedReader getBufferedReader(String s, String format) throws Exception
|
||||||
|
{
|
||||||
|
InputStream is;
|
||||||
|
|
||||||
|
if (s.indexOf("http://") >= 0)
|
||||||
|
is = new BufferedInputStream((new URL(s)).openStream());
|
||||||
|
else
|
||||||
|
is = new FileInputStream(s);
|
||||||
|
|
||||||
|
if (format==null)
|
||||||
|
return new BufferedReader(new InputStreamReader(is));
|
||||||
|
else
|
||||||
|
return new BufferedReader(new InputStreamReader(is, format));
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
Loading…
Reference in a new issue