Updated installers to include unicode to wylie and acip to wylie.
This commit is contained in:
parent
c5c448f0c6
commit
d6d2eeb004
4 changed files with 103 additions and 72 deletions
|
@ -238,12 +238,15 @@ class ConvertDialog extends JDialog
|
|||
|| cmd.equals(BROWSENEW))
|
||||
{
|
||||
JButton src = (JButton)ae.getSource();
|
||||
String choice = (String)choices.getSelectedItem();
|
||||
if (src == browseOld) {
|
||||
jfc.setFileFilter((ACIP_TO_UNI_TEXT.equals((String)choices.getSelectedItem())
|
||||
|| WYLIE_TO_UNI_TEXT.equals((String)choices.getSelectedItem())
|
||||
|| UNI_TO_WYLIE_TEXT.equals((String)choices.getSelectedItem())
|
||||
|| ACIP_TO_TMW.equals((String)choices.getSelectedItem())
|
||||
|| WYLIE_TO_TMW.equals((String)choices.getSelectedItem()))
|
||||
jfc.setFileFilter((ACIP_TO_UNI_TEXT.equals(choice)
|
||||
|| WYLIE_TO_UNI_TEXT.equals(choice)
|
||||
|| UNI_TO_WYLIE_TEXT.equals(choice)
|
||||
|| ACIP_TO_TMW.equals(choice)
|
||||
|| WYLIE_TO_TMW.equals(choice)
|
||||
|| ACIP_TO_WYLIE_TEXT.equals(choice)
|
||||
|| WYLIE_TO_ACIP_TEXT.equals(choice))
|
||||
? textFileFilter : rtfFileFilter);
|
||||
} else {
|
||||
jfc.setFileFilter((ACIP_TO_UNI_TEXT.equals((String)choices.getSelectedItem())
|
||||
|
@ -500,12 +503,12 @@ class ConvertDialog extends JDialog
|
|||
newFileNamePrefix = suggested_WYLIE_prefix;
|
||||
if (UNI_TO_WYLIE_TEXT == ct)
|
||||
newFileNameExtension = ".TXT";
|
||||
} else if (TMW_TO_WYLIE_TEXT == ct) {
|
||||
} else if (TMW_TO_WYLIE_TEXT == ct || ACIP_TO_WYLIE_TEXT == ct) {
|
||||
newFileNamePrefix = suggested_WYLIE_prefix;
|
||||
newFileNameExtension = ".TXT";
|
||||
} else if (TMW_TO_ACIP == ct) {
|
||||
newFileNamePrefix = suggested_ACIP_prefix;
|
||||
} else if (TMW_TO_ACIP_TEXT == ct) {
|
||||
} else if (TMW_TO_ACIP_TEXT == ct || WYLIE_TO_ACIP_TEXT == ct) {
|
||||
newFileNamePrefix = suggested_ACIP_prefix;
|
||||
newFileNameExtension = ".TXT";
|
||||
} else if (TMW_TO_UNI == ct || ACIP_TO_UNI_TEXT == ct
|
||||
|
|
|
@ -22,13 +22,15 @@ package org.thdl.tib.input;
|
|||
/** Constants used by ConvertDialog.
|
||||
|
||||
@author Nathaniel Garson, Tibetan and Himalayan Digital Library */
|
||||
interface FontConverterConstants
|
||||
public interface FontConverterConstants
|
||||
{
|
||||
final String UNI_TO_WYLIE_TEXT = "Unicode to Wylie (UTF-8 Text->Text)";
|
||||
final String UNI_TO_WYLIE_TEXT = "Unicode to Wylie (UTF-16 Text->Text)";
|
||||
final String WYLIE_TO_UNI_TEXT = "Wylie to Unicode (Text->Text)";
|
||||
final String WYLIE_TO_TMW = "Wylie to TMW (Text->RTF)";
|
||||
final String WYLIE_TO_ACIP_TEXT = "Wylie to ACIP (Text->Text)";
|
||||
final String TMW_TO_SAME_TMW = "TMW to the same TMW (for testing only) (RTF->RTF)";
|
||||
final String ACIP_TO_UNI_TEXT = "ACIP to Unicode (Text->Text)";
|
||||
final String ACIP_TO_WYLIE_TEXT = "ACIP to Wylie (Text->Text)";
|
||||
final String ACIP_TO_TMW = "ACIP to TMW (Text->RTF)";
|
||||
final String TMW_TO_ACIP = "TMW to ACIP (RTF->RTF)";
|
||||
final String TMW_TO_ACIP_TEXT = "TMW to ACIP (RTF->Text)";
|
||||
|
@ -45,7 +47,9 @@ interface FontConverterConstants
|
|||
final String[] CHOICES = new String[] {
|
||||
WYLIE_TO_UNI_TEXT,
|
||||
WYLIE_TO_TMW,
|
||||
//WYLIE_TO_ACIP_TEXT,
|
||||
ACIP_TO_UNI_TEXT,
|
||||
ACIP_TO_WYLIE_TEXT,
|
||||
ACIP_TO_TMW,
|
||||
TMW_TO_ACIP,
|
||||
TMW_TO_ACIP_TEXT,
|
||||
|
@ -54,6 +58,7 @@ interface FontConverterConstants
|
|||
TMW_TO_UNI,
|
||||
TMW_TO_WYLIE,
|
||||
TMW_TO_WYLIE_TEXT,
|
||||
UNI_TO_WYLIE_TEXT,
|
||||
FIND_SOME_NON_TMW, // TODO(dchandler): should this be in DEBUG_CHOICES only?
|
||||
FIND_SOME_NON_TM, // TODO(dchandler): should this be in DEBUG_CHOICES only?
|
||||
FIND_ALL_NON_TMW, // TODO(dchandler): should this be in DEBUG_CHOICES only?
|
||||
|
@ -61,11 +66,11 @@ interface FontConverterConstants
|
|||
};
|
||||
|
||||
final String[] DEBUG_CHOICES = new String[] {
|
||||
UNI_TO_WYLIE_TEXT,
|
||||
TMW_TO_SAME_TMW,
|
||||
WYLIE_TO_UNI_TEXT,
|
||||
WYLIE_TO_TMW,
|
||||
ACIP_TO_UNI_TEXT,
|
||||
ACIP_TO_WYLIE_TEXT,
|
||||
ACIP_TO_TMW,
|
||||
TMW_TO_ACIP,
|
||||
TMW_TO_ACIP_TEXT,
|
||||
|
@ -74,6 +79,7 @@ interface FontConverterConstants
|
|||
TMW_TO_UNI,
|
||||
TMW_TO_WYLIE,
|
||||
TMW_TO_WYLIE_TEXT,
|
||||
UNI_TO_WYLIE_TEXT,
|
||||
FIND_SOME_NON_TMW,
|
||||
FIND_SOME_NON_TM,
|
||||
FIND_ALL_NON_TMW,
|
||||
|
@ -88,5 +94,4 @@ interface FontConverterConstants
|
|||
|
||||
// String Constants
|
||||
public final String PROGRAM_TITLE = "THDL Tibetan Converters -- featuring Jskad Technology";
|
||||
|
||||
}
|
|
@ -26,6 +26,7 @@ import java.io.InputStream;
|
|||
import java.io.InputStreamReader;
|
||||
import java.io.OutputStreamWriter;
|
||||
import java.io.PrintStream;
|
||||
import java.io.PrintWriter;
|
||||
import java.util.ArrayList;
|
||||
|
||||
import javax.swing.text.SimpleAttributeSet;
|
||||
|
@ -43,6 +44,7 @@ import org.thdl.util.ThdlDebug;
|
|||
import org.thdl.util.ThdlLazyException;
|
||||
import org.thdl.util.ThdlOptions;
|
||||
import org.thdl.util.ThdlVersion;
|
||||
import org.thdl.tib.scanner.*;
|
||||
|
||||
/** TibetanConverter is a command-line utility for converting to and
|
||||
* from Tibetan Machine Web (TMW). It converts TMW to Wylie, ACIP,
|
||||
|
@ -329,9 +331,9 @@ public class TibetanConverter implements FontConverterConstants {
|
|||
static int reallyConvert(InputStream in, PrintStream out, String ct,
|
||||
String warningLevel, boolean shortMessages,
|
||||
boolean colors) {
|
||||
if (UNI_TO_WYLIE_TEXT == ct) {
|
||||
if (UNI_TO_WYLIE_TEXT == ct || WYLIE_TO_ACIP_TEXT == ct || ACIP_TO_WYLIE_TEXT == ct) {
|
||||
try {
|
||||
String uniText;
|
||||
/*String uniText;
|
||||
{
|
||||
// TODO(dchandler): use, here and elsewhere in the
|
||||
// codebase,
|
||||
|
@ -355,7 +357,12 @@ public class TibetanConverter implements FontConverterConstants {
|
|||
String ewtsText = Converter.convertToEwtsForComputers(uniText,
|
||||
errors);
|
||||
// TODO(dchandler): is 51 the right choice?
|
||||
return (errors.length() > 0) ? 51 : 0;
|
||||
return (errors.length() > 0) ? 51 : 0;*/
|
||||
BasicTibetanTranscriptionConverter bc = null;
|
||||
if (UNI_TO_WYLIE_TEXT == ct) bc = new BasicTibetanTranscriptionConverter(new BufferedReader(new InputStreamReader(in, "UTF16")), new PrintWriter(out));
|
||||
else bc = new BasicTibetanTranscriptionConverter(new BufferedReader(new InputStreamReader(in)), new PrintWriter(out));
|
||||
bc.run(ct);
|
||||
return 0;
|
||||
} catch (IOException e) {
|
||||
// TODO(dchandler): print it? where to?
|
||||
return 48;
|
||||
|
|
|
@ -23,6 +23,7 @@ import org.thdl.tib.text.TibTextUtils;
|
|||
import org.thdl.tib.text.TibetanDocument;
|
||||
import org.thdl.tib.text.reverter.Converter;
|
||||
import org.thdl.tib.text.ttt.EwtsToUnicodeForXslt;
|
||||
import org.thdl.tib.input.*;
|
||||
import org.thdl.util.*;
|
||||
import java.net.*;
|
||||
import java.io.*;
|
||||
|
@ -35,12 +36,12 @@ import java.io.*;
|
|||
* @author Andres Montano
|
||||
*
|
||||
*/
|
||||
public class BasicTibetanTranscriptionConverter
|
||||
public class BasicTibetanTranscriptionConverter implements FontConverterConstants
|
||||
{
|
||||
private static BufferedReader in;
|
||||
private static PrintWriter out;
|
||||
|
||||
private static int conversionType=0;
|
||||
//private static int conversionType=0;
|
||||
private static final int ACIP_TO_WYLIE=1;
|
||||
private static final int WYLIE_TO_ACIP=2;
|
||||
private static final int UNICODE_TO_WYLIE=3;
|
||||
|
@ -81,39 +82,39 @@ public class BasicTibetanTranscriptionConverter
|
|||
aa -> a, a'a -> A, ai->i, aee ->ai, au->u, aoo->au, ae->e,
|
||||
ao->o, ee->ai, oo->au, 'I->-I I->-i, a'i->I, a'u->U, a'e->E, a'o->O,
|
||||
a'i->I, a'u->U, a'e->E, a'o->O, ,->/, # -> @##, * -> @#, \ -> ?, ` -> !,
|
||||
/-/ -> (-), ga-y -> g.y, g-y -> g.y, na-y -> n+y
|
||||
/-/ -> (-), ga-y -> g.y, g-y -> g.y, na-y -> n+y */
|
||||
|
||||
nuevaLinea = replace(nuevaLinea, "ts", "tq");
|
||||
nuevaLinea = replace(nuevaLinea, "tz", "ts");
|
||||
nuevaLinea = replace(nuevaLinea, "tq", "tsh");
|
||||
nuevaLinea = replace(nuevaLinea, "v", "w");
|
||||
nuevaLinea = replace(nuevaLinea, "TH", "Th");
|
||||
nuevaLinea = replace(nuevaLinea, "kSH", "k+Sh");
|
||||
nuevaLinea = replace(nuevaLinea, "kaSH", "k+Sh");
|
||||
nuevaLinea = replace(nuevaLinea, "SH", "Sh");
|
||||
nuevaLinea = replace(nuevaLinea, ":", "H");
|
||||
nuevaLinea = replace(nuevaLinea, "NH", "NaH");
|
||||
nuevaLinea = replace(nuevaLinea, "dh", "d+h");
|
||||
nuevaLinea = replace(nuevaLinea, "gh", "g+h");
|
||||
nuevaLinea = replace(nuevaLinea, "bh", "b+h");
|
||||
nuevaLinea = replace(nuevaLinea, "dzh", "dz+h");
|
||||
nuevaLinea = replace(nuevaLinea, "aa", "a");
|
||||
nuevaLinea = replace(nuevaLinea, "ai", "i");
|
||||
nuevaLinea = replace(nuevaLinea, "aee", "ai");
|
||||
nuevaLinea = replace(nuevaLinea, "au", "u");
|
||||
nuevaLinea = replace(nuevaLinea, "aoo", "au");
|
||||
nuevaLinea = replace(nuevaLinea, "ae", "e");
|
||||
nuevaLinea = replace(nuevaLinea, "ao", "o");
|
||||
nuevaLinea = replace(nuevaLinea, "ee", "ai");
|
||||
nuevaLinea = replace(nuevaLinea, "oo", "au");
|
||||
nuevaLinea = replace(nuevaLinea, "\'I", "\'q");
|
||||
nuevaLinea = replace(nuevaLinea, "I", "-i");
|
||||
nuevaLinea = replace(nuevaLinea, "\'q", "-I");
|
||||
nuevaLinea = replace(nuevaLinea, "\\", "?");
|
||||
nuevaLinea = replace(nuevaLinea, "`", "!");
|
||||
nuevaLinea = replace(nuevaLinea, "ga-y", "g.y");
|
||||
nuevaLinea = replace(nuevaLinea, "g-y", "g.y");
|
||||
nuevaLinea = replace(nuevaLinea, "na-y", "n+y");
|
||||
/* nuevaLinea = Manipulate.replace(nuevaLinea, "ts", "tq");
|
||||
nuevaLinea = Manipulate.replace(nuevaLinea, "tz", "ts");
|
||||
nuevaLinea = Manipulate.replace(nuevaLinea, "tq", "tsh");
|
||||
nuevaLinea = Manipulate.replace(nuevaLinea, "v", "w");
|
||||
nuevaLinea = Manipulate.replace(nuevaLinea, "TH", "Th");
|
||||
nuevaLinea = Manipulate.replace(nuevaLinea, "kSH", "k+Sh");
|
||||
nuevaLinea = Manipulate.replace(nuevaLinea, "kaSH", "k+Sh");
|
||||
nuevaLinea = Manipulate.replace(nuevaLinea, "SH", "Sh");
|
||||
nuevaLinea = Manipulate.replace(nuevaLinea, ":", "H");
|
||||
nuevaLinea = Manipulate.replace(nuevaLinea, "NH", "NaH");
|
||||
nuevaLinea = Manipulate.replace(nuevaLinea, "dh", "d+h");
|
||||
nuevaLinea = Manipulate.replace(nuevaLinea, "gh", "g+h");
|
||||
nuevaLinea = Manipulate.replace(nuevaLinea, "bh", "b+h");
|
||||
nuevaLinea = Manipulate.replace(nuevaLinea, "dzh", "dz+h");
|
||||
nuevaLinea = Manipulate.replace(nuevaLinea, "aa", "a");
|
||||
nuevaLinea = Manipulate.replace(nuevaLinea, "ai", "i");
|
||||
nuevaLinea = Manipulate.replace(nuevaLinea, "aee", "ai");
|
||||
nuevaLinea = Manipulate.replace(nuevaLinea, "au", "u");
|
||||
nuevaLinea = Manipulate.replace(nuevaLinea, "aoo", "au");
|
||||
nuevaLinea = Manipulate.replace(nuevaLinea, "ae", "e");
|
||||
nuevaLinea = Manipulate.replace(nuevaLinea, "ao", "o");
|
||||
nuevaLinea = Manipulate.replace(nuevaLinea, "ee", "ai");
|
||||
nuevaLinea = Manipulate.replace(nuevaLinea, "oo", "au");
|
||||
nuevaLinea = Manipulate.replace(nuevaLinea, "\'I", "\'q");
|
||||
nuevaLinea = Manipulate.replace(nuevaLinea, "I", "-i");
|
||||
nuevaLinea = Manipulate.replace(nuevaLinea, "\'q", "-I");
|
||||
nuevaLinea = Manipulate.replace(nuevaLinea, "\\", "?");
|
||||
nuevaLinea = Manipulate.replace(nuevaLinea, "`", "!");
|
||||
nuevaLinea = Manipulate.replace(nuevaLinea, "ga-y", "g.y");
|
||||
nuevaLinea = Manipulate.replace(nuevaLinea, "g-y", "g.y");
|
||||
nuevaLinea = Manipulate.replace(nuevaLinea, "na-y", "n+y");
|
||||
|
||||
len = nuevaLinea.length();
|
||||
for (i=0; i<len; i++)
|
||||
|
@ -136,9 +137,9 @@ public class BasicTibetanTranscriptionConverter
|
|||
{
|
||||
chP = nuevaLinea.charAt(i-1);
|
||||
chN = nuevaLinea.charAt(i+1);
|
||||
if (isVowel(chN))
|
||||
if (Manipulate.isVowel(chN))
|
||||
{
|
||||
if (Character.isLetter(chP) && !isVowel(chP))
|
||||
if (Character.isLetter(chP) && !Manipulate.isVowel(chP))
|
||||
{
|
||||
nuevaLinea = nuevaLinea.substring(0, i) + Character.toUpperCase(chN) + nuevaLinea.substring(i+2);
|
||||
len--;
|
||||
|
@ -172,7 +173,7 @@ public class BasicTibetanTranscriptionConverter
|
|||
}
|
||||
}
|
||||
}
|
||||
nuevaLinea = replace(nuevaLinea, ",", "/");
|
||||
nuevaLinea = Manipulate.replace(nuevaLinea, ",", "/");
|
||||
|
||||
return nuevaLinea; */
|
||||
}
|
||||
|
@ -191,8 +192,8 @@ public class BasicTibetanTranscriptionConverter
|
|||
}
|
||||
return tibDoc.getACIP(new boolean[] { false });
|
||||
|
||||
/* DLC FIXME: for unknown things, return null.
|
||||
if (wylie.equals("@##")) return "#";
|
||||
/* DLC FIXME: for unknown things, return null. */
|
||||
/* if (wylie.equals("@##")) return "#";
|
||||
if (wylie.equals("@#")) return "*";
|
||||
if (wylie.equals("!")) return "`";
|
||||
if (wylie.equals("b+h")) return "BH";
|
||||
|
@ -213,7 +214,7 @@ public class BasicTibetanTranscriptionConverter
|
|||
for (j=0; j<len; j++)
|
||||
{
|
||||
i = j;
|
||||
/*ciclo:
|
||||
//ciclo:
|
||||
while(true) // para manejar excepciones; que honda!
|
||||
{
|
||||
switch(caract[i])
|
||||
|
@ -229,7 +230,7 @@ public class BasicTibetanTranscriptionConverter
|
|||
caract[i] = Character.toUpperCase(caract[i]);
|
||||
else if (Character.isUpperCase(caract[i]))
|
||||
caract[i] = Character.toLowerCase(caract[i]);
|
||||
/* break ciclo;
|
||||
// break ciclo;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -238,18 +239,18 @@ public class BasicTibetanTranscriptionConverter
|
|||
|
||||
// ahora hacer los cambios de Michael Roach
|
||||
|
||||
nuevaPalabra = replace(nuevaPalabra, "TSH", "TQQ");
|
||||
nuevaPalabra = replace(nuevaPalabra, "TS", "TZ");
|
||||
nuevaPalabra = replace(nuevaPalabra, "TQQ", "TS");
|
||||
nuevaPalabra = replace(nuevaPalabra, "a", "'A");
|
||||
nuevaPalabra = replace(nuevaPalabra, "i", "'I");
|
||||
nuevaPalabra = replace(nuevaPalabra, "u", "'U");
|
||||
nuevaPalabra = replace(nuevaPalabra, "-I", "i");
|
||||
nuevaPalabra = replace(nuevaPalabra, "/", ",");
|
||||
nuevaPalabra = replace(nuevaPalabra, "_", " ");
|
||||
nuevaPalabra = replace(nuevaPalabra, "|", ";");
|
||||
nuevaPalabra = fixWazur(nuevaPalabra);
|
||||
return nuevaPalabra; */
|
||||
nuevaPalabra = Manipulate.replace(nuevaPalabra, "TSH", "TQQ");
|
||||
nuevaPalabra = Manipulate.replace(nuevaPalabra, "TS", "TZ");
|
||||
nuevaPalabra = Manipulate.replace(nuevaPalabra, "TQQ", "TS");
|
||||
nuevaPalabra = Manipulate.replace(nuevaPalabra, "a", "'A");
|
||||
nuevaPalabra = Manipulate.replace(nuevaPalabra, "i", "'I");
|
||||
nuevaPalabra = Manipulate.replace(nuevaPalabra, "u", "'U");
|
||||
nuevaPalabra = Manipulate.replace(nuevaPalabra, "-I", "i");
|
||||
nuevaPalabra = Manipulate.replace(nuevaPalabra, "/", ",");
|
||||
nuevaPalabra = Manipulate.replace(nuevaPalabra, "_", " ");
|
||||
nuevaPalabra = Manipulate.replace(nuevaPalabra, "|", ";");
|
||||
nuevaPalabra = Manipulate.fixWazur(nuevaPalabra);
|
||||
return nuevaPalabra;*/
|
||||
}
|
||||
|
||||
/** Converts Tibetan Unicode to EWTS. */
|
||||
|
@ -291,7 +292,7 @@ public class BasicTibetanTranscriptionConverter
|
|||
|
||||
public static void printSyntax()
|
||||
{
|
||||
System.out.println("Syntax: NewBasicTibetanTranscriptionConverter [-format format-of-files | [-fi format-of-input-file] [-fo format-of-output-file]] [-it acip | wylie | utf8] [-ot acip | wylie | utf8] input-file [output-file]");
|
||||
System.out.println("Syntax: BasicTibetanTranscriptionConverter [-format format-of-files | [-fi format-of-input-file] [-fo format-of-output-file]] [-it acip | wylie | UTF16] [-ot acip | wylie | UTF16] input-file [output-file]");
|
||||
}
|
||||
|
||||
public BasicTibetanTranscriptionConverter(BufferedReader in, PrintWriter out)
|
||||
|
@ -309,7 +310,7 @@ public class BasicTibetanTranscriptionConverter
|
|||
String option;
|
||||
String formatIn = null, formatOut = null, inputTransSyst="wylie", outputTransSyst="wylie";
|
||||
boolean file = false;
|
||||
|
||||
int conversionType=0;
|
||||
|
||||
if (argNum<=currentArg)
|
||||
{
|
||||
|
@ -382,11 +383,26 @@ public class BasicTibetanTranscriptionConverter
|
|||
in = getBufferedReader (args[currentArg], formatIn);
|
||||
}
|
||||
|
||||
new BasicTibetanTranscriptionConverter(in, out).run();
|
||||
new BasicTibetanTranscriptionConverter(in, out).run(conversionType);
|
||||
}
|
||||
|
||||
/**
|
||||
* This method was added for compatibility's sake with the FontConverterConstants interfase.
|
||||
*
|
||||
* @param conversionType
|
||||
* @throws IOException
|
||||
*/
|
||||
public void run(String conversionType) throws IOException
|
||||
{
|
||||
int conversionTypeInt=0;
|
||||
if (conversionType==ACIP_TO_WYLIE_TEXT) conversionTypeInt = ACIP_TO_WYLIE;
|
||||
if (conversionType==WYLIE_TO_ACIP_TEXT) conversionTypeInt = WYLIE_TO_ACIP;
|
||||
if (conversionType==UNI_TO_WYLIE_TEXT) conversionTypeInt = UNICODE_TO_WYLIE;
|
||||
if (conversionType==WYLIE_TO_UNI_TEXT) conversionTypeInt = WYLIE_TO_UNICODE;
|
||||
run(conversionTypeInt);
|
||||
}
|
||||
|
||||
public void run() throws Exception
|
||||
public void run(int conversionType) throws IOException
|
||||
{
|
||||
String linea, result;
|
||||
|
||||
|
|
Loading…
Reference in a new issue