From e0e125c76fa936fc01f57490f2764865d14525e7 Mon Sep 17 00:00:00 2001 From: amontano Date: Wed, 9 Oct 2002 00:11:15 +0000 Subject: [PATCH] includes static methods used by BinaryFileGenerator --- source/org/thdl/tib/scanner/Manipulate.java | 333 ++++++++++++++++++++ 1 file changed, 333 insertions(+) create mode 100644 source/org/thdl/tib/scanner/Manipulate.java diff --git a/source/org/thdl/tib/scanner/Manipulate.java b/source/org/thdl/tib/scanner/Manipulate.java new file mode 100644 index 0000000..d509011 --- /dev/null +++ b/source/org/thdl/tib/scanner/Manipulate.java @@ -0,0 +1,333 @@ +/* +The contents of this file are subject to the AMP Open Community License +Version 1.0 (the "License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License on the AMP web site +(http://www.tibet.iteso.mx/Guatemala/). + +Software distributed under the License is distributed on an "AS IS" basis, +WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +License for the specific terms governing rights and limitations under the +License. + +The Initial Developer of this software is Andres Montano Pellegrini. Portions +created by Andres Montano Pellegrini are Copyright 2001 Andres Montano +Pellegrini. All Rights Reserved. + +Contributor(s): ______________________________________. +*/ +package org.thdl.tib.scanner; + + +import java.io.*; + +/** Takes the output of ConsoleScannerFilter + (in RY format), converts the Wylie to Acip + and displays the result in csv format. + + @author Andrés Montano Pellegrini +*/ + +public class Manipulate +{ + + public static String replace(String linea, String origSub, String newSub) + { + int pos, lenOrig = origSub.length(); + while ((pos = linea.indexOf(origSub))!=-1) + { + linea = linea.substring(0, pos).concat(newSub).concat(linea.substring(pos+lenOrig)); + } + return linea; + } + + public static boolean isVowel (char ch) + { + ch = Character.toLowerCase(ch); + return ch=='a' || ch=='e' || ch=='i' || ch=='o' || ch=='u'; + } + + public static String wylieToAcip(String palabra) + { + char []caract; + int i, j, len; + String nuevaPalabra; + + caract = palabra.toCharArray(); + len = palabra.length(); + for (j=0; j0) + { + i--; + break; + } + default:*/ + if (Character.isLowerCase(caract[i])) + caract[i] = Character.toUpperCase(caract[i]); + else if (Character.isUpperCase(caract[i])) + caract[i] = Character.toLowerCase(caract[i]); + /* break ciclo; + } + }*/ + } + nuevaPalabra = new String(caract); + // nuevaPalabra = palabra.toUpperCase(); + + // ahora hacer los cambios de Michael Roach + + nuevaPalabra = replace(nuevaPalabra, "TSH", "TQQ"); + nuevaPalabra = replace(nuevaPalabra, "TS", "TZ"); + nuevaPalabra = replace(nuevaPalabra, "TQQ", "TS"); + nuevaPalabra = fixWazur(nuevaPalabra); + return nuevaPalabra; + } + + public static String acipToWylie(String linea) + { + char caract[], ch, chP, chN; + String nuevaLinea; + int i, len; + boolean open; + + caract = linea.toCharArray(); + len = linea.length(); + for (i=0; i tsh, tz -> ts, v -> w, + TH -> Th, SH -> Sh, : -> H, dh -> d+h, gh -> g+h, bh -> b+h, dzh -> dz+h, + kSH -> k+Sh, aa -> a, a'a -> A, ai->i, aee ->ai, au->u, aoo->au, ae->e, + ao->o, ee->ai, oo->au, 'I->-I I->-i, a'i->I, a'u->U, a'e->E, a'o->O, + a'i->I, a'u->U, a'e->E, a'o->O, ,->/, # -> @##, * -> @#, \ -> ?, ` -> !, + /-/ -> (-), ga-y -> g.y, g-y -> g.y */ + + nuevaLinea = replace(nuevaLinea, "ts", "tq"); + nuevaLinea = replace(nuevaLinea, "tz", "ts"); + nuevaLinea = replace(nuevaLinea, "tq", "tsh"); + nuevaLinea = replace(nuevaLinea, "v", "w"); + nuevaLinea = replace(nuevaLinea, "TH", "Th"); + nuevaLinea = replace(nuevaLinea, "SH", "Sh"); + nuevaLinea = replace(nuevaLinea, ":", "H"); + nuevaLinea = replace(nuevaLinea, "dh", "d+h"); + nuevaLinea = replace(nuevaLinea, "gh", "g+h"); + nuevaLinea = replace(nuevaLinea, "bh", "b+h"); + nuevaLinea = replace(nuevaLinea, "dzh", "dz+h"); + nuevaLinea = replace(nuevaLinea, "kSH", "k+Sh"); + nuevaLinea = replace(nuevaLinea, "aa", "a"); + nuevaLinea = replace(nuevaLinea, "ai", "i"); + nuevaLinea = replace(nuevaLinea, "aee", "ai"); + nuevaLinea = replace(nuevaLinea, "au", "u"); + nuevaLinea = replace(nuevaLinea, "aoo", "au"); + nuevaLinea = replace(nuevaLinea, "ae", "e"); + nuevaLinea = replace(nuevaLinea, "ao", "o"); + nuevaLinea = replace(nuevaLinea, "ee", "ai"); + nuevaLinea = replace(nuevaLinea, "oo", "au"); + nuevaLinea = replace(nuevaLinea, "\'I", "\'q"); + nuevaLinea = replace(nuevaLinea, "I", "-i"); + nuevaLinea = replace(nuevaLinea, "\'q", "-I"); + nuevaLinea = replace(nuevaLinea, "\\", "?"); + nuevaLinea = replace(nuevaLinea, "`", "!"); + nuevaLinea = replace(nuevaLinea, "ga-y", "g.y"); + nuevaLinea = replace(nuevaLinea, "g-y", "g.y"); + + len = nuevaLinea.length(); + for (i=0; i0 && i dict-dest + * + * arch-palabras es usado solo cuando deseamos las palabras cambiadas + * a otro archivo. + */ + + public static void main (String[] args) throws Exception + { + String linea, palabra, definicion, nuevaPalabra; + int marker; + PrintWriter psPalabras = null; + + BufferedReader keyb = new BufferedReader(new InputStreamReader(System.in)); + + if (args.length==1) + psPalabras = new PrintWriter(new FileOutputStream(args[0])); + + while ((linea=keyb.readLine())!=null) + { + if (linea.trim().equals("")) continue; + marker = linea.indexOf('-'); + if (marker<0) // linea tiene error + { + palabra = linea; + definicion = ""; + } + else + { + palabra = linea.substring(0, marker).trim(); + definicion = linea.substring(marker+1).trim(); + } + + nuevaPalabra = wylieToAcip(palabra); + + if (psPalabras!=null) + psPalabras.println(nuevaPalabra); + else System.out.print(nuevaPalabra + '\t'); + if (definicion.equals("")) + System.out.println(palabra); + else + System.out.println(palabra + '\t' + definicion); + } + if (psPalabras!=null) psPalabras.flush(); + } + + /** Returns the base letter of a syllable. Does not include the vowel! + Ignoring cases for now. */ + public static String getBaseLetter (String sil) + { + sil = sil.toLowerCase(); + + int i=0, len=sil.length(); + char ch, ch2; + + while (!isVowel(sil.charAt(i))) i++; + if (i==0) return ""; + + i--; + if (i==-1) return ""; + + if (sil.charAt(i)=='-') i--; + + ch = sil.charAt(i); + + // check to see if it is a subscript (y, r, l, w) + if (i>0) + { + switch (ch) + { + case 'r': case 'l': case 'w': i--; + break; + case 'y': + ch2 = sil.charAt(i-1); + switch (ch2) + { + case '.': return "y"; + case 'n': return "ny"; + default: i--; + } + } + } + if (i==0) return sil.substring(i,i+1); + ch = sil.charAt(i); + ch2 = sil.charAt(i-1); + + switch(ch) + { + case 'h': + switch (ch2) + { + case 'k': case 'c': case 't': case 'p': case 'z': + return sil.substring(i-1,i+1); + case 's': + if (i-2>=0 && sil.charAt(i-2)=='t') return "tsh"; + else return "sh"; + default: return "h"; + } + case 's': + if (ch2=='t') return "ts"; + else return "s"; + case 'g': + if (ch2=='n') return "ng"; + else return "g"; + case 'z': + if (ch2=='d') return "dz"; + else return "z"; + } + return sil.substring(i,i+1); + } + +}