/* The contents of this file are subject to the AMP Open Community License Version 1.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License on the AMP web site (http://www.tibet.iteso.mx/Guatemala/). Software distributed under the License is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the specific terms governing rights and limitations under the License. The Initial Developer of this software is Andres Montano Pellegrini. Portions created by Andres Montano Pellegrini are Copyright 2001 Andres Montano Pellegrini. All Rights Reserved. Contributor(s): ______________________________________. */ package org.thdl.tib.scanner; import org.thdl.tib.text.InvalidTransliterationException; import org.thdl.tib.text.TibTextUtils; import org.thdl.tib.text.TibetanDocument; import org.thdl.tib.text.reverter.Converter; import org.thdl.tib.text.ttt.EwtsToUnicodeForXslt; import org.thdl.util.*; import java.net.*; import java.io.*; /** * Wrap-up class for the various converters that the Translation Tool needs. * All conversions are done by static methods meant to be as straight-forward * and simple as possible not caring about error or warning messages. * * @author Andres Montano * */ public class BasicTibetanTranscriptionConverter { private static BufferedReader in; private static PrintWriter out; private static int conversionType=0; private static final int ACIP_TO_WYLIE=1; private static final int WYLIE_TO_ACIP=2; private static final int UNICODE_TO_WYLIE=3; private static final int WYLIE_TO_UNICODE=4; /** Converts from the Acip transliteration scheme to EWTS.*/ public static String acipToWylie(String acip) { TibetanDocument tibDoc = new TibetanDocument(); try { TibTextUtils.insertTibetanMachineWebForTranslit(false, acip, tibDoc, 0, false); } catch (InvalidTransliterationException e) { return null; } return tibDoc.getWylie(new boolean[] { false }); /* char caract[], ch, chP, chN; String nuevaLinea; int i, len; boolean open; caract = acip.toCharArray(); len = acip.length(); for (i=0; i tsh, tz -> ts, v -> w, TH -> Th, kSH, kaSH -> k+Sh, SH -> Sh, : -> H, dh -> d+h, gh -> g+h, bh -> b+h, dzh -> dz+h, aa -> a, a'a -> A, ai->i, aee ->ai, au->u, aoo->au, ae->e, ao->o, ee->ai, oo->au, 'I->-I I->-i, a'i->I, a'u->U, a'e->E, a'o->O, a'i->I, a'u->U, a'e->E, a'o->O, ,->/, # -> @##, * -> @#, \ -> ?, ` -> !, /-/ -> (-), ga-y -> g.y, g-y -> g.y, na-y -> n+y nuevaLinea = replace(nuevaLinea, "ts", "tq"); nuevaLinea = replace(nuevaLinea, "tz", "ts"); nuevaLinea = replace(nuevaLinea, "tq", "tsh"); nuevaLinea = replace(nuevaLinea, "v", "w"); nuevaLinea = replace(nuevaLinea, "TH", "Th"); nuevaLinea = replace(nuevaLinea, "kSH", "k+Sh"); nuevaLinea = replace(nuevaLinea, "kaSH", "k+Sh"); nuevaLinea = replace(nuevaLinea, "SH", "Sh"); nuevaLinea = replace(nuevaLinea, ":", "H"); nuevaLinea = replace(nuevaLinea, "NH", "NaH"); nuevaLinea = replace(nuevaLinea, "dh", "d+h"); nuevaLinea = replace(nuevaLinea, "gh", "g+h"); nuevaLinea = replace(nuevaLinea, "bh", "b+h"); nuevaLinea = replace(nuevaLinea, "dzh", "dz+h"); nuevaLinea = replace(nuevaLinea, "aa", "a"); nuevaLinea = replace(nuevaLinea, "ai", "i"); nuevaLinea = replace(nuevaLinea, "aee", "ai"); nuevaLinea = replace(nuevaLinea, "au", "u"); nuevaLinea = replace(nuevaLinea, "aoo", "au"); nuevaLinea = replace(nuevaLinea, "ae", "e"); nuevaLinea = replace(nuevaLinea, "ao", "o"); nuevaLinea = replace(nuevaLinea, "ee", "ai"); nuevaLinea = replace(nuevaLinea, "oo", "au"); nuevaLinea = replace(nuevaLinea, "\'I", "\'q"); nuevaLinea = replace(nuevaLinea, "I", "-i"); nuevaLinea = replace(nuevaLinea, "\'q", "-I"); nuevaLinea = replace(nuevaLinea, "\\", "?"); nuevaLinea = replace(nuevaLinea, "`", "!"); nuevaLinea = replace(nuevaLinea, "ga-y", "g.y"); nuevaLinea = replace(nuevaLinea, "g-y", "g.y"); nuevaLinea = replace(nuevaLinea, "na-y", "n+y"); len = nuevaLinea.length(); for (i=0; i0 && i1 && !Character.isLetter(nuevaLinea.charAt(i-2)) || chN == 'a' && (i+2==len || !Character.isLetter(nuevaLinea.charAt(i+2))))) { nuevaLinea = nuevaLinea.substring(0,i-1) + Character.toUpperCase(chN) + nuevaLinea.substring(i+2); len-=2; } } } } } open = false; for (i=0; i0) { i--; break; } default: if (Character.isLowerCase(caract[i])) caract[i] = Character.toUpperCase(caract[i]); else if (Character.isUpperCase(caract[i])) caract[i] = Character.toLowerCase(caract[i]); /* break ciclo; } } } nuevaPalabra = new String(caract); // nuevaPalabra = palabra.toUpperCase(); // ahora hacer los cambios de Michael Roach nuevaPalabra = replace(nuevaPalabra, "TSH", "TQQ"); nuevaPalabra = replace(nuevaPalabra, "TS", "TZ"); nuevaPalabra = replace(nuevaPalabra, "TQQ", "TS"); nuevaPalabra = replace(nuevaPalabra, "a", "'A"); nuevaPalabra = replace(nuevaPalabra, "i", "'I"); nuevaPalabra = replace(nuevaPalabra, "u", "'U"); nuevaPalabra = replace(nuevaPalabra, "-I", "i"); nuevaPalabra = replace(nuevaPalabra, "/", ","); nuevaPalabra = replace(nuevaPalabra, "_", " "); nuevaPalabra = replace(nuevaPalabra, "|", ";"); nuevaPalabra = fixWazur(nuevaPalabra); return nuevaPalabra; */ } /** Converts Tibetan Unicode to EWTS. */ public static String unicodeToWylie(String unicode) { String machineWylie; TibetanDocument tibDoc = new TibetanDocument(); StringBuffer errors = new StringBuffer(); machineWylie = Converter.convertToEwtsForComputers(unicode, errors); try { TibTextUtils.insertTibetanMachineWebForTranslit(true, machineWylie, tibDoc, 0, false); } catch (InvalidTransliterationException e) { return null; } return tibDoc.getWylie(new boolean[] { false }); } /** Converts EWTS to Tibetan Unicode. */ public static String wylieToUnicode(String wylie) { return EwtsToUnicodeForXslt.convertEwtsTo(wylie); } /** Converts EWTS to Tibetan Unicode represented in NCR. */ public static String wylieToHTMLUnicode(String wylie) { return Manipulate.UnicodeString2NCR(wylieToUnicode(wylie)); } /** Converts Tibetan Unicode represented in NCR to EWTS. */ public static String HTMLUnicodeToWylie(String unicode) { return unicodeToWylie(Manipulate.NCR2UnicodeString(unicode)); } public static void printSyntax() { System.out.println("Syntax: NewBasicTibetanTranscriptionConverter [-format format-of-files | [-fi format-of-input-file] [-fo format-of-output-file]] [-it acip | wylie | utf8] [-ot acip | wylie | utf8] input-file [output-file]"); } public BasicTibetanTranscriptionConverter(BufferedReader in, PrintWriter out) { BasicTibetanTranscriptionConverter.in = in; BasicTibetanTranscriptionConverter.out = out; } public static void main (String[] args) throws Exception { PrintWriter out; BufferedReader in=null; int argNum = args.length, currentArg=0; String option; String formatIn = null, formatOut = null, inputTransSyst="wylie", outputTransSyst="wylie"; boolean file = false; if (argNum<=currentArg) { printSyntax(); return; } while (args[currentArg].charAt(0)=='-') { option = args[currentArg++].substring(1); if (option.equals("format")) { formatIn = formatOut = args[currentArg]; } else if (option.equals("fi")) { formatIn = args[currentArg]; } else if (option.equals("fo")) { formatOut = args[currentArg]; } else if (option.equals("it")) { inputTransSyst = args[currentArg]; } else if (option.equals("ot")) { outputTransSyst = args[currentArg]; } currentArg++; } if (!inputTransSyst.equals(outputTransSyst)) { if (inputTransSyst.equals("wylie")) { if (outputTransSyst.equals("acip")) conversionType = WYLIE_TO_ACIP; else conversionType = WYLIE_TO_UNICODE; } else if (inputTransSyst.equals("acip")) conversionType = ACIP_TO_WYLIE; else conversionType = UNICODE_TO_WYLIE; } switch (args.length-currentArg) { case 0: if (formatIn != null) { System.out.println("Syntax error: input file name expected."); return; } out = new PrintWriter(System.out); in = new BufferedReader(new InputStreamReader(System.in)); break; case 1: if (formatOut != null) { System.out.println("Syntax error: output file name expected."); return; } out = new PrintWriter(System.out); file = true; break; default: if (formatOut != null) out = new PrintWriter(new OutputStreamWriter(new FileOutputStream(args[currentArg + 1]), formatOut)); else out = new PrintWriter(new OutputStreamWriter(new FileOutputStream(args[currentArg + 1]))); file = true; } if (file) { in = getBufferedReader (args[currentArg], formatIn); } new BasicTibetanTranscriptionConverter(in, out).run(); } public void run() throws Exception { String linea, result; while ((linea=in.readLine())!=null) { switch(conversionType) { case ACIP_TO_WYLIE: result = acipToWylie(linea); break; case WYLIE_TO_ACIP: result = wylieToAcip(linea); break; case UNICODE_TO_WYLIE: result = unicodeToWylie(linea); break; case WYLIE_TO_UNICODE: result = wylieToUnicode(linea); break; default: result = linea; } if (result!=null) out.println(result); } out.flush(); } public static BufferedReader getBufferedReader(String s, String format) throws Exception { InputStream is; if (s.indexOf("http://") >= 0) is = new BufferedInputStream((new URL(s)).openStream()); else is = new FileInputStream(s); if (format==null) return new BufferedReader(new InputStreamReader(is)); else return new BufferedReader(new InputStreamReader(is, format)); } }