From 0c891ec96c7ca64be1b6f13b8b5d82554e610162 Mon Sep 17 00:00:00 2001 From: amontano Date: Mon, 24 Apr 2006 19:19:04 +0000 Subject: [PATCH] The org.thdl.tib.scanner.Manipulate class was originally meant as a grab-bag of methods processing strings representing tibetan wylie. It doesn't make sense to leave there the wrap-up methods for converting from and to the various transcription schemes. Moved them to BasicTibetanTranscriptionConverter and updated all classes that point to them. --- source/org/thdl/tib/scanner/AcipToWylie.java | 2 +- .../BasicTibetanTranscriptionConverter.java | 278 ++++++++++++++++++ source/org/thdl/tib/scanner/Manipulate.java | 226 -------------- .../thdl/tib/scanner/OnLineScannerFilter.java | 4 +- .../org/thdl/tib/scanner/StrictDuffPane.java | 7 +- source/org/thdl/tib/scanner/SwingWord.java | 7 +- .../org/thdl/tib/scanner/TibetanScanner.java | 6 +- .../org/thdl/tib/text/TibetanMachineWeb.java | 3 +- 8 files changed, 290 insertions(+), 243 deletions(-) create mode 100644 source/org/thdl/tib/scanner/BasicTibetanTranscriptionConverter.java diff --git a/source/org/thdl/tib/scanner/AcipToWylie.java b/source/org/thdl/tib/scanner/AcipToWylie.java index 09ed158..61d8d6a 100644 --- a/source/org/thdl/tib/scanner/AcipToWylie.java +++ b/source/org/thdl/tib/scanner/AcipToWylie.java @@ -99,7 +99,7 @@ public class AcipToWylie String linea; while ((linea=in.readLine())!=null) { - out.println(Manipulate.acipToWylie(linea)); + out.println(BasicTibetanTranscriptionConverter.acipToWylie(linea)); } out.flush(); } diff --git a/source/org/thdl/tib/scanner/BasicTibetanTranscriptionConverter.java b/source/org/thdl/tib/scanner/BasicTibetanTranscriptionConverter.java new file mode 100644 index 0000000..b7be49f --- /dev/null +++ b/source/org/thdl/tib/scanner/BasicTibetanTranscriptionConverter.java @@ -0,0 +1,278 @@ +/* +The contents of this file are subject to the AMP Open Community License +Version 1.0 (the "License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License on the AMP web site +(http://www.tibet.iteso.mx/Guatemala/). + +Software distributed under the License is distributed on an "AS IS" basis, +WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +License for the specific terms governing rights and limitations under the +License. + +The Initial Developer of this software is Andres Montano Pellegrini. Portions +created by Andres Montano Pellegrini are Copyright 2001 Andres Montano +Pellegrini. All Rights Reserved. + +Contributor(s): ______________________________________. +*/ +package org.thdl.tib.scanner; + +import org.thdl.tib.text.InvalidTransliterationException; +import org.thdl.tib.text.TibTextUtils; +import org.thdl.tib.text.TibetanDocument; +import org.thdl.tib.text.reverter.Converter; +import org.thdl.tib.text.ttt.EwtsToUnicodeForXslt; + +/** + * Wrap-up class for the various converters that the Translation Tool needs. + * All conversions are done by static methods meant to be as straight-forward + * and simple as possible not caring about error or warning messages. + * + * @author Andres Montano + * + */ +public class BasicTibetanTranscriptionConverter { + + /** Converts from the Acip transliteration scheme to EWTS.*/ + public static String acipToWylie(String acip) + { + TibetanDocument tibDoc = new TibetanDocument(); + try + { + TibTextUtils.insertTibetanMachineWebForTranslit(false, acip, tibDoc, 0, false); + } + catch (InvalidTransliterationException e) + { + return null; + } + return tibDoc.getWylie(new boolean[] { false }); + + /* char caract[], ch, chP, chN; + String nuevaLinea; + int i, len; + boolean open; + + caract = acip.toCharArray(); + len = acip.length(); + for (i=0; i tsh, tz -> ts, v -> w, + TH -> Th, kSH, kaSH -> k+Sh, SH -> Sh, : -> H, dh -> d+h, gh -> g+h, bh -> b+h, dzh -> dz+h, + aa -> a, a'a -> A, ai->i, aee ->ai, au->u, aoo->au, ae->e, + ao->o, ee->ai, oo->au, 'I->-I I->-i, a'i->I, a'u->U, a'e->E, a'o->O, + a'i->I, a'u->U, a'e->E, a'o->O, ,->/, # -> @##, * -> @#, \ -> ?, ` -> !, + /-/ -> (-), ga-y -> g.y, g-y -> g.y, na-y -> n+y + + nuevaLinea = replace(nuevaLinea, "ts", "tq"); + nuevaLinea = replace(nuevaLinea, "tz", "ts"); + nuevaLinea = replace(nuevaLinea, "tq", "tsh"); + nuevaLinea = replace(nuevaLinea, "v", "w"); + nuevaLinea = replace(nuevaLinea, "TH", "Th"); + nuevaLinea = replace(nuevaLinea, "kSH", "k+Sh"); + nuevaLinea = replace(nuevaLinea, "kaSH", "k+Sh"); + nuevaLinea = replace(nuevaLinea, "SH", "Sh"); + nuevaLinea = replace(nuevaLinea, ":", "H"); + nuevaLinea = replace(nuevaLinea, "NH", "NaH"); + nuevaLinea = replace(nuevaLinea, "dh", "d+h"); + nuevaLinea = replace(nuevaLinea, "gh", "g+h"); + nuevaLinea = replace(nuevaLinea, "bh", "b+h"); + nuevaLinea = replace(nuevaLinea, "dzh", "dz+h"); + nuevaLinea = replace(nuevaLinea, "aa", "a"); + nuevaLinea = replace(nuevaLinea, "ai", "i"); + nuevaLinea = replace(nuevaLinea, "aee", "ai"); + nuevaLinea = replace(nuevaLinea, "au", "u"); + nuevaLinea = replace(nuevaLinea, "aoo", "au"); + nuevaLinea = replace(nuevaLinea, "ae", "e"); + nuevaLinea = replace(nuevaLinea, "ao", "o"); + nuevaLinea = replace(nuevaLinea, "ee", "ai"); + nuevaLinea = replace(nuevaLinea, "oo", "au"); + nuevaLinea = replace(nuevaLinea, "\'I", "\'q"); + nuevaLinea = replace(nuevaLinea, "I", "-i"); + nuevaLinea = replace(nuevaLinea, "\'q", "-I"); + nuevaLinea = replace(nuevaLinea, "\\", "?"); + nuevaLinea = replace(nuevaLinea, "`", "!"); + nuevaLinea = replace(nuevaLinea, "ga-y", "g.y"); + nuevaLinea = replace(nuevaLinea, "g-y", "g.y"); + nuevaLinea = replace(nuevaLinea, "na-y", "n+y"); + + len = nuevaLinea.length(); + for (i=0; i0 && i1 && !Character.isLetter(nuevaLinea.charAt(i-2)) || chN == 'a' && (i+2==len || !Character.isLetter(nuevaLinea.charAt(i+2))))) + { + nuevaLinea = nuevaLinea.substring(0,i-1) + Character.toUpperCase(chN) + nuevaLinea.substring(i+2); + len-=2; + } + } + } + } + } + + open = false; + for (i=0; i0) + { + i--; + break; + } + default: + if (Character.isLowerCase(caract[i])) + caract[i] = Character.toUpperCase(caract[i]); + else if (Character.isUpperCase(caract[i])) + caract[i] = Character.toLowerCase(caract[i]); + /* break ciclo; + } + } + } + nuevaPalabra = new String(caract); + // nuevaPalabra = palabra.toUpperCase(); + + // ahora hacer los cambios de Michael Roach + + nuevaPalabra = replace(nuevaPalabra, "TSH", "TQQ"); + nuevaPalabra = replace(nuevaPalabra, "TS", "TZ"); + nuevaPalabra = replace(nuevaPalabra, "TQQ", "TS"); + nuevaPalabra = replace(nuevaPalabra, "a", "'A"); + nuevaPalabra = replace(nuevaPalabra, "i", "'I"); + nuevaPalabra = replace(nuevaPalabra, "u", "'U"); + nuevaPalabra = replace(nuevaPalabra, "-I", "i"); + nuevaPalabra = replace(nuevaPalabra, "/", ","); + nuevaPalabra = replace(nuevaPalabra, "_", " "); + nuevaPalabra = replace(nuevaPalabra, "|", ";"); + nuevaPalabra = fixWazur(nuevaPalabra); + return nuevaPalabra; */ + } + + /** Converts Tibetan Unicode to EWTS. */ + public static String unicodeToWylie(String unicode) + { + String machineWylie; + TibetanDocument tibDoc = new TibetanDocument(); + StringBuffer errors = new StringBuffer(); + + machineWylie = Converter.convertToEwtsForComputers(unicode, errors); + try + { + TibTextUtils.insertTibetanMachineWebForTranslit(true, machineWylie, tibDoc, 0, false); + } + catch (InvalidTransliterationException e) + { + return null; + } + return tibDoc.getWylie(new boolean[] { false }); + } + + /** Converts EWTS to Tibetan Unicode. */ + public static String wylieToUnicode(String wylie) + { + return EwtsToUnicodeForXslt.convertEwtsTo(wylie); + } + + /** Converts EWTS to Tibetan Unicode represented in NCR. */ + public static String wylieToHTMLUnicode(String wylie) + { + return Manipulate.UnicodeString2NCR(wylieToUnicode(wylie)); + } + + /** Converts Tibetan Unicode represented in NCR to EWTS. */ + public static String HTMLUnicodeToWylie(String unicode) + { + return unicodeToWylie(Manipulate.NCR2UnicodeString(unicode)); + } +} \ No newline at end of file diff --git a/source/org/thdl/tib/scanner/Manipulate.java b/source/org/thdl/tib/scanner/Manipulate.java index f5f1574..64e8299 100644 --- a/source/org/thdl/tib/scanner/Manipulate.java +++ b/source/org/thdl/tib/scanner/Manipulate.java @@ -17,10 +17,6 @@ Contributor(s): ______________________________________. */ package org.thdl.tib.scanner; -import org.thdl.tib.text.*; -import org.thdl.tib.text.reverter.*; - - /** Miscelaneous static methods for the manipulation of Tibetan text. @author Andrés Montano Pellegrini @@ -376,228 +372,6 @@ public class Manipulate if (psPalabras!=null) psPalabras.flush(); }*/ - public static String acipToWylie(String acip) - { - TibetanDocument tibDoc = new TibetanDocument(); - try - { - TibTextUtils.insertTibetanMachineWebForTranslit(false, acip, tibDoc, 0, false); - } - catch (InvalidTransliterationException e) - { - return null; - } - return tibDoc.getWylie(new boolean[] { false }); - - /* char caract[], ch, chP, chN; - String nuevaLinea; - int i, len; - boolean open; - - caract = acip.toCharArray(); - len = acip.length(); - for (i=0; i tsh, tz -> ts, v -> w, - TH -> Th, kSH, kaSH -> k+Sh, SH -> Sh, : -> H, dh -> d+h, gh -> g+h, bh -> b+h, dzh -> dz+h, - aa -> a, a'a -> A, ai->i, aee ->ai, au->u, aoo->au, ae->e, - ao->o, ee->ai, oo->au, 'I->-I I->-i, a'i->I, a'u->U, a'e->E, a'o->O, - a'i->I, a'u->U, a'e->E, a'o->O, ,->/, # -> @##, * -> @#, \ -> ?, ` -> !, - /-/ -> (-), ga-y -> g.y, g-y -> g.y, na-y -> n+y - - nuevaLinea = replace(nuevaLinea, "ts", "tq"); - nuevaLinea = replace(nuevaLinea, "tz", "ts"); - nuevaLinea = replace(nuevaLinea, "tq", "tsh"); - nuevaLinea = replace(nuevaLinea, "v", "w"); - nuevaLinea = replace(nuevaLinea, "TH", "Th"); - nuevaLinea = replace(nuevaLinea, "kSH", "k+Sh"); - nuevaLinea = replace(nuevaLinea, "kaSH", "k+Sh"); - nuevaLinea = replace(nuevaLinea, "SH", "Sh"); - nuevaLinea = replace(nuevaLinea, ":", "H"); - nuevaLinea = replace(nuevaLinea, "NH", "NaH"); - nuevaLinea = replace(nuevaLinea, "dh", "d+h"); - nuevaLinea = replace(nuevaLinea, "gh", "g+h"); - nuevaLinea = replace(nuevaLinea, "bh", "b+h"); - nuevaLinea = replace(nuevaLinea, "dzh", "dz+h"); - nuevaLinea = replace(nuevaLinea, "aa", "a"); - nuevaLinea = replace(nuevaLinea, "ai", "i"); - nuevaLinea = replace(nuevaLinea, "aee", "ai"); - nuevaLinea = replace(nuevaLinea, "au", "u"); - nuevaLinea = replace(nuevaLinea, "aoo", "au"); - nuevaLinea = replace(nuevaLinea, "ae", "e"); - nuevaLinea = replace(nuevaLinea, "ao", "o"); - nuevaLinea = replace(nuevaLinea, "ee", "ai"); - nuevaLinea = replace(nuevaLinea, "oo", "au"); - nuevaLinea = replace(nuevaLinea, "\'I", "\'q"); - nuevaLinea = replace(nuevaLinea, "I", "-i"); - nuevaLinea = replace(nuevaLinea, "\'q", "-I"); - nuevaLinea = replace(nuevaLinea, "\\", "?"); - nuevaLinea = replace(nuevaLinea, "`", "!"); - nuevaLinea = replace(nuevaLinea, "ga-y", "g.y"); - nuevaLinea = replace(nuevaLinea, "g-y", "g.y"); - nuevaLinea = replace(nuevaLinea, "na-y", "n+y"); - - len = nuevaLinea.length(); - for (i=0; i0 && i1 && !Character.isLetter(nuevaLinea.charAt(i-2)) || chN == 'a' && (i+2==len || !Character.isLetter(nuevaLinea.charAt(i+2))))) - { - nuevaLinea = nuevaLinea.substring(0,i-1) + Character.toUpperCase(chN) + nuevaLinea.substring(i+2); - len-=2; - } - } - } - } - } - - open = false; - for (i=0; i0) - { - i--; - break; - } - default: - if (Character.isLowerCase(caract[i])) - caract[i] = Character.toUpperCase(caract[i]); - else if (Character.isUpperCase(caract[i])) - caract[i] = Character.toLowerCase(caract[i]); - /* break ciclo; - } - } - } - nuevaPalabra = new String(caract); - // nuevaPalabra = palabra.toUpperCase(); - - // ahora hacer los cambios de Michael Roach - - nuevaPalabra = replace(nuevaPalabra, "TSH", "TQQ"); - nuevaPalabra = replace(nuevaPalabra, "TS", "TZ"); - nuevaPalabra = replace(nuevaPalabra, "TQQ", "TS"); - nuevaPalabra = replace(nuevaPalabra, "a", "'A"); - nuevaPalabra = replace(nuevaPalabra, "i", "'I"); - nuevaPalabra = replace(nuevaPalabra, "u", "'U"); - nuevaPalabra = replace(nuevaPalabra, "-I", "i"); - nuevaPalabra = replace(nuevaPalabra, "/", ","); - nuevaPalabra = replace(nuevaPalabra, "_", " "); - nuevaPalabra = replace(nuevaPalabra, "|", ";"); - nuevaPalabra = fixWazur(nuevaPalabra); - return nuevaPalabra; */ - } - - public static String unicodeToWylie(String unicode) - { - String machineWylie; - TibetanDocument tibDoc = new TibetanDocument(); - StringBuffer errors = new StringBuffer(); - - machineWylie = Converter.convertToEwtsForComputers(unicode, errors); - try - { - TibTextUtils.insertTibetanMachineWebForTranslit(true, machineWylie, tibDoc, 0, false); - } - catch (InvalidTransliterationException e) - { - return null; - } - return tibDoc.getWylie(new boolean[] { false }); - } - /** From http://www.i18nfaq.com/2005/07/how-do-i-convert-ncr-format-to-java.html */ public static String NCR2UnicodeString(String str) { diff --git a/source/org/thdl/tib/scanner/OnLineScannerFilter.java b/source/org/thdl/tib/scanner/OnLineScannerFilter.java index e2ecda9..9a7cf31 100644 --- a/source/org/thdl/tib/scanner/OnLineScannerFilter.java +++ b/source/org/thdl/tib/scanner/OnLineScannerFilter.java @@ -317,8 +317,8 @@ public class OnLineScannerFilter extends HttpServlet } */ scanner.clearTokens(); in = Manipulate.NCR2UnicodeString(in); - if (Manipulate.guessIfUnicode(in)) in = Manipulate.unicodeToWylie(in); - else if (Manipulate.guessIfAcip(in)) in = Manipulate.acipToWylie(in); + if (Manipulate.guessIfUnicode(in)) in = BasicTibetanTranscriptionConverter.unicodeToWylie(in); + else if (Manipulate.guessIfAcip(in)) in = BasicTibetanTranscriptionConverter.acipToWylie(in); scanner.scanBody(in); scanner.finishUp(); printText(pw, tibetan); diff --git a/source/org/thdl/tib/scanner/StrictDuffPane.java b/source/org/thdl/tib/scanner/StrictDuffPane.java index 747e469..61e87d7 100644 --- a/source/org/thdl/tib/scanner/StrictDuffPane.java +++ b/source/org/thdl/tib/scanner/StrictDuffPane.java @@ -142,11 +142,8 @@ public class StrictDuffPane extends DuffPane if (pasteAsString) { String data = (String)contents.getTransferData(DataFlavor.stringFlavor); - if (Manipulate.guessIfUnicode(data)) - { - StringBuffer errors = new StringBuffer(); - data = Converter.convertToEwtsForComputers(data, errors); - } else if (Manipulate.guessIfAcip(data)) data = Manipulate.acipToWylie(data); + if (Manipulate.guessIfUnicode(data)) data = BasicTibetanTranscriptionConverter.unicodeToWylie(data); + else if (Manipulate.guessIfAcip(data)) data = BasicTibetanTranscriptionConverter.acipToWylie(data); toTibetanMachineWeb(data, offset); } diff --git a/source/org/thdl/tib/scanner/SwingWord.java b/source/org/thdl/tib/scanner/SwingWord.java index a880874..4ee34de 100644 --- a/source/org/thdl/tib/scanner/SwingWord.java +++ b/source/org/thdl/tib/scanner/SwingWord.java @@ -20,9 +20,6 @@ Contributor(s): ______________________________________. to store the dictionary. */ package org.thdl.tib.scanner; -//import org.thdl.tib.text.TibetanHTML; -import org.thdl.tib.text.ttt.*; - /** Tibetan word with its corresponding definitions. @author Andrés Montano Pellegrini @@ -62,7 +59,7 @@ public class SwingWord extends Word try { // localWord = TibetanHTML.getHTML(super.token + " "); - localWord = Manipulate.UnicodeString2NCR(EwtsToUnicodeForXslt.convertEwtsTo(super.token + " ")); + localWord = BasicTibetanTranscriptionConverter.wylieToHTMLUnicode(super.token + " "); } catch (Exception e) { @@ -92,7 +89,7 @@ public class SwingWord extends Word { try { - result = Manipulate.UnicodeString2NCR(EwtsToUnicodeForXslt.convertEwtsTo(localWord + " ")); + result = BasicTibetanTranscriptionConverter.wylieToHTMLUnicode(localWord + " "); className = " class = \"tib\""; } catch (Exception e) diff --git a/source/org/thdl/tib/scanner/TibetanScanner.java b/source/org/thdl/tib/scanner/TibetanScanner.java index d4d439b..215cd38 100644 --- a/source/org/thdl/tib/scanner/TibetanScanner.java +++ b/source/org/thdl/tib/scanner/TibetanScanner.java @@ -28,9 +28,9 @@ import org.thdl.util.ThdlVersion; public abstract class TibetanScanner { public static final String version = "The Tibetan to English Translation Tool, version 3.3.0 compiled on " + ThdlVersion.getTimeOfCompilation() + ". "; - public static final String copyrightUnicode="Copyright " + '\u00A9' + " 2000-2005 by Andr" + '\u00E9' + "s Montano Pellegrini, all rights reserved."; - public static final String copyrightASCII="Copyright 2000-2005 by Andres Montano Pellegrini, all rights reserved."; - public static final String copyrightHTML="
" + version + "Copyright © 2000-2005 by Andrés Montano Pellegrini.
All rights reserved.
"; + public static final String copyrightUnicode="Copyright " + '\u00A9' + " 2000-200??6 by Andr" + '\u00E9' + "s Montano Pellegrini, all rights reserved."; + public static final String copyrightASCII="Copyright 2000-2006 by Andres Montano Pellegrini, all rights reserved."; + public static final String copyrightHTML="
" + version + "Copyright © 2000-2006 by Andrés Montano Pellegrini.
All rights reserved.
"; public static final int NORMAL_MODE=1; public static final int DEBUG_MODE=2; diff --git a/source/org/thdl/tib/text/TibetanMachineWeb.java b/source/org/thdl/tib/text/TibetanMachineWeb.java index 3c7366c..6a1fc33 100644 --- a/source/org/thdl/tib/text/TibetanMachineWeb.java +++ b/source/org/thdl/tib/text/TibetanMachineWeb.java @@ -39,6 +39,7 @@ import org.thdl.tib.text.tshegbar.UnicodeUtils; import org.thdl.util.ThdlDebug; import org.thdl.util.ThdlOptions; import org.thdl.util.Trie; +import org.thdl.tib.scanner.BasicTibetanTranscriptionConverter; /** * Interfaces between Extended Wylie and the TibetanMachineWeb fonts. @@ -2016,7 +2017,7 @@ private static String acipForGlyph(String hashKey) { else // else we are not be able to use it because it's not smart // about stacks (e.g., W+W) - return org.thdl.tib.scanner.Manipulate.wylieToAcip(hashKey); + return BasicTibetanTranscriptionConverter.wylieToAcip(hashKey); } /** Error that appears in a document when some TMW cannot be