Fixed importation errors for the translation tool.
This commit is contained in:
parent
4b4787411b
commit
686756116f
4 changed files with 199 additions and 118 deletions
|
@ -80,10 +80,6 @@ class AcipToTab
|
|||
if (temp.length()>0)
|
||||
{
|
||||
currentPage=Integer.parseInt(temp);
|
||||
if (currentPage==3141)
|
||||
{
|
||||
System.out.println("Hello!");
|
||||
}
|
||||
}
|
||||
if (marker<len)
|
||||
{
|
||||
|
@ -151,7 +147,7 @@ class AcipToTab
|
|||
while (marker<len)
|
||||
{
|
||||
ch = entrada.charAt(marker);
|
||||
if (ch==' ' || ch=='/') break;
|
||||
if (Manipulate.isEndOfSyllableMark(ch) || Manipulate.isEndOfParagraphMark(ch)) break;
|
||||
marker++;
|
||||
}
|
||||
|
||||
|
@ -184,50 +180,56 @@ class AcipToTab
|
|||
while (marker < len)
|
||||
{
|
||||
ch = entrada.charAt(marker);
|
||||
switch(ch)
|
||||
{
|
||||
case '/':
|
||||
|
||||
if (Manipulate.isEndOfParagraphMark(ch))
|
||||
{
|
||||
markerNotFound=false;
|
||||
marker2=marker+1;
|
||||
}
|
||||
else if (Manipulate.isEndOfSyllableMark(ch))
|
||||
{
|
||||
if (marker+1<len && Manipulate.isEndOfSyllableMark(entrada.charAt(marker+1))) // verify " "
|
||||
{
|
||||
markerNotFound=false;
|
||||
marker2=marker+1;
|
||||
break;
|
||||
case '(': case '<':
|
||||
markerNotFound=false;
|
||||
marker2=marker;
|
||||
break;
|
||||
case 'g': // verify "g "
|
||||
if (marker+1<len && Manipulate.isVowel(entrada.charAt(marker-1)) && entrada.charAt(marker+1)==' ')
|
||||
{
|
||||
temp = entrada.substring(0, marker+1);
|
||||
if (!lastWeirdDefiniendum.startsWith(temp))
|
||||
marker2=++marker;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
case '(': case '<':
|
||||
markerNotFound=false;
|
||||
marker2=marker;
|
||||
break;
|
||||
case 'g': // verify "g "
|
||||
if (marker+1<len && Manipulate.isVowel(entrada.charAt(marker-1)) && Manipulate.isEndOfSyllableMark(entrada.charAt(marker+1)))
|
||||
{
|
||||
temp = entrada.substring(0, marker+1);
|
||||
if (!lastWeirdDefiniendum.startsWith(temp))
|
||||
{
|
||||
markerNotFound=false;
|
||||
marker2=++marker;
|
||||
lastWeirdDefiniendum=temp;
|
||||
//n++;
|
||||
// out.println(currentPage + ": " + entrada);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case '.':
|
||||
if (marker+2<len && entrada.charAt(marker+1)=='.' && entrada.charAt(marker+2)=='.')
|
||||
{
|
||||
markerNotFound=false;
|
||||
marker2=++marker;
|
||||
lastWeirdDefiniendum=temp;
|
||||
//n++;
|
||||
// out.println(currentPage + ": " + entrada);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case ' ': // verify " "
|
||||
if (marker+1<len && entrada.charAt(marker+1)==' ')
|
||||
{
|
||||
markerNotFound=false;
|
||||
marker2=++marker;
|
||||
}
|
||||
break;
|
||||
case '.':
|
||||
if (marker+2<len && entrada.charAt(marker+1)=='.' && entrada.charAt(marker+2)=='.')
|
||||
{
|
||||
markerNotFound=false;
|
||||
marker2=marker;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
if (Character.isDigit(ch))
|
||||
{
|
||||
markerNotFound=false;
|
||||
marker2=marker;
|
||||
}
|
||||
marker2=marker;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
if (Character.isDigit(ch))
|
||||
{
|
||||
markerNotFound=false;
|
||||
marker2=marker;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (markerNotFound) marker++;
|
||||
|
|
|
@ -182,7 +182,14 @@ public class BinaryFileGenerator extends SimplifiedLinkedList
|
|||
private BinaryFileGenerator(String sil, String def, int numDef)
|
||||
{
|
||||
super();
|
||||
int marker = sil.indexOf(" ");
|
||||
int marker;
|
||||
while (true)
|
||||
{
|
||||
marker = Manipulate.indexOfExtendedEndOfSyllableMark(sil);
|
||||
if (marker==0) sil = sil.substring(1);
|
||||
else if (marker==sil.length()-1) sil = sil.substring(0,sil.length()-1);
|
||||
else break;
|
||||
}
|
||||
|
||||
// fix for updates
|
||||
this.sourceDef = new ByteDictionarySource();
|
||||
|
@ -310,7 +317,7 @@ public class BinaryFileGenerator extends SimplifiedLinkedList
|
|||
while (marker<len)
|
||||
{
|
||||
ch = entrada.charAt(marker);
|
||||
if (ch==' ' || ch=='/') break;
|
||||
if (Manipulate.isEndOfSyllableMark(ch) || Manipulate.isEndOfParagraphMark(ch)) break;
|
||||
marker++;
|
||||
}
|
||||
|
||||
|
@ -333,48 +340,54 @@ public class BinaryFileGenerator extends SimplifiedLinkedList
|
|||
while (marker < len)
|
||||
{
|
||||
ch = entrada.charAt(marker);
|
||||
switch(ch)
|
||||
{
|
||||
case '/':
|
||||
|
||||
if (Manipulate.isEndOfParagraphMark(ch))
|
||||
{
|
||||
markerNotFound=false;
|
||||
marker2=marker+1;
|
||||
}
|
||||
else if (Manipulate.isEndOfSyllableMark(ch))
|
||||
{
|
||||
if (marker+1<len && Manipulate.isEndOfSyllableMark(entrada.charAt(marker+1))) // verify " "
|
||||
{
|
||||
markerNotFound=false;
|
||||
marker2=marker+1;
|
||||
break;
|
||||
case '(': case '<':
|
||||
markerNotFound=false;
|
||||
marker2=marker;
|
||||
break;
|
||||
case 'g': // verify "g "
|
||||
if (marker+1<len && Manipulate.isVowel(entrada.charAt(marker-1)) && entrada.charAt(marker+1)==' ')
|
||||
{
|
||||
temp = entrada.substring(0, marker+1);
|
||||
if (!lastWeirdDefiniendum.startsWith(temp))
|
||||
{
|
||||
marker2=++marker;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
case '(': case '<':
|
||||
markerNotFound=false;
|
||||
marker2=marker;
|
||||
break;
|
||||
case 'g': // verify "g "
|
||||
if (marker+1<len && Manipulate.isVowel(entrada.charAt(marker-1)) && Manipulate.isEndOfSyllableMark(entrada.charAt(marker+1)))
|
||||
{
|
||||
temp = entrada.substring(0, marker+1);
|
||||
if (!lastWeirdDefiniendum.startsWith(temp))
|
||||
{
|
||||
markerNotFound=false;
|
||||
marker2=++marker;
|
||||
lastWeirdDefiniendum=temp;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case '.':
|
||||
if (marker+2<len && entrada.charAt(marker+1)=='.' && entrada.charAt(marker+2)=='.')
|
||||
{
|
||||
markerNotFound=false;
|
||||
marker2=++marker;
|
||||
lastWeirdDefiniendum=temp;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case ' ': // verify " "
|
||||
if (marker+1<len && entrada.charAt(marker+1)==' ')
|
||||
{
|
||||
markerNotFound=false;
|
||||
marker2=++marker;
|
||||
}
|
||||
break;
|
||||
case '.':
|
||||
if (marker+2<len && entrada.charAt(marker+1)=='.' && entrada.charAt(marker+2)=='.')
|
||||
{
|
||||
markerNotFound=false;
|
||||
marker2=marker;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
if (Character.isDigit(ch))
|
||||
{
|
||||
markerNotFound=false;
|
||||
marker2=marker;
|
||||
}
|
||||
marker2=marker;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
if (Character.isDigit(ch))
|
||||
{
|
||||
markerNotFound=false;
|
||||
marker2=marker;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (markerNotFound) marker++;
|
||||
else break;
|
||||
|
@ -486,16 +499,20 @@ public class BinaryFileGenerator extends SimplifiedLinkedList
|
|||
default:
|
||||
marker = entrada.indexOf(delimiter);
|
||||
}
|
||||
if (marker<0)
|
||||
if (marker<=0)
|
||||
{
|
||||
System.out.println("Error loading line " + currentLine + ", in file " + archivo + ":");
|
||||
System.out.println(entrada);
|
||||
}
|
||||
else
|
||||
{
|
||||
marker2 = Manipulate.indexOfBracketMarks(entrada.substring(0,marker));
|
||||
if (marker2>0) marker = marker2;
|
||||
|
||||
s1 = Manipulate.deleteQuotes(entrada.substring(0,marker).trim());
|
||||
s2 = Manipulate.deleteQuotes(entrada.substring(marker+delimiter.length()).trim());
|
||||
if (!s2.equals(""))
|
||||
s2 = Manipulate.deleteQuotes(entrada.substring(marker+delimiter.length())).trim();
|
||||
|
||||
if (Manipulate.isMeaningful(s2))
|
||||
{
|
||||
if (currentLine%5000==0)
|
||||
{
|
||||
|
@ -527,8 +544,16 @@ public class BinaryFileGenerator extends SimplifiedLinkedList
|
|||
Link link, newLink;
|
||||
BinaryFileGenerator ultimo;
|
||||
String firstSillable;
|
||||
int marker = word.indexOf(" "), comp;
|
||||
int marker, comp;
|
||||
|
||||
while (true)
|
||||
{
|
||||
marker = Manipulate.indexOfExtendedEndOfSyllableMark(word);
|
||||
if (marker==0) word = word.substring(1);
|
||||
else if (marker==word.length()-1) word = word.substring(0,word.length()-1);
|
||||
else break;
|
||||
}
|
||||
|
||||
if (marker<0)
|
||||
firstSillable = word;
|
||||
else firstSillable = word.substring(0,marker);
|
||||
|
|
|
@ -35,8 +35,6 @@ public class LocalTibetanScanner extends TibetanScanner
|
|||
private SyllableListTree raiz, silActual, lastCompSil, silAnterior;
|
||||
private String wordActual, lastCompWord;
|
||||
private Vector floatingSil;
|
||||
private static String endOfParagraphMarks = "/;|!:[]^@#$%=<>(){}";
|
||||
private static String endOfSyllableMarks = " _\t";
|
||||
|
||||
static
|
||||
{
|
||||
|
@ -295,11 +293,6 @@ public class LocalTibetanScanner extends TibetanScanner
|
|||
}
|
||||
}
|
||||
|
||||
private boolean isEndOfSyllable(int ch)
|
||||
{
|
||||
return (endOfSyllableMarks.indexOf(ch)>-1);
|
||||
}
|
||||
|
||||
public void scanLine(String linea)
|
||||
{
|
||||
int init = 0, fin;
|
||||
|
@ -325,7 +318,7 @@ outAHere:
|
|||
if (init>=linea.length())
|
||||
break outAHere;
|
||||
ch = linea.charAt(init);
|
||||
if (endOfParagraphMarks.indexOf(ch)>=0)
|
||||
if (Manipulate.isPunctuationMark(ch))
|
||||
{
|
||||
if (doNotFinishUp)
|
||||
{
|
||||
|
@ -334,7 +327,7 @@ outAHere:
|
|||
}
|
||||
wordList.addLast(new PunctuationMark(ch));
|
||||
}
|
||||
else if (endOfSyllableMarks.indexOf(ch)<0)
|
||||
else if (!Manipulate.isEndOfSyllableMark(ch))
|
||||
break;
|
||||
|
||||
init++;
|
||||
|
@ -350,12 +343,12 @@ outAHere:
|
|||
while (fin < linea.length())
|
||||
{
|
||||
ch = linea.charAt(fin);
|
||||
if (endOfParagraphMarks.indexOf(ch)>=0)
|
||||
if (Manipulate.isPunctuationMark(ch))
|
||||
{
|
||||
doNotFinishUp = false;
|
||||
break;
|
||||
}
|
||||
else if (endOfSyllableMarks.indexOf(ch)>=0)
|
||||
else if (Manipulate.isEndOfSyllableMark(ch))
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -28,6 +28,11 @@ import org.thdl.util.*;
|
|||
public class Manipulate
|
||||
{
|
||||
|
||||
private static String endOfParagraphMarks = "/;|!:^@#$%=";
|
||||
private static String bracketMarks = "<>(){}[]";
|
||||
private static String endOfSyllableMarks = " _\t";
|
||||
private static String allStopMarkers = endOfSyllableMarks + endOfParagraphMarks + bracketMarks;
|
||||
|
||||
/* public static String[] parseFields (String s, char delimiter)
|
||||
{
|
||||
int pos;
|
||||
|
@ -43,7 +48,52 @@ public class Manipulate
|
|||
|
||||
ll.addLast(s.trim());
|
||||
return ll.toStringArray();
|
||||
}*/
|
||||
}*/
|
||||
|
||||
public static int indexOfAnyChar(String str, String chars)
|
||||
{
|
||||
int i;
|
||||
for (i=0; i<str.length(); i++)
|
||||
{
|
||||
if (chars.indexOf(str.charAt(i))>=0)
|
||||
return i;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
public static int indexOfExtendedEndOfSyllableMark(String word)
|
||||
{
|
||||
return indexOfAnyChar(word, allStopMarkers);
|
||||
}
|
||||
|
||||
public static int indexOfBracketMarks(String word)
|
||||
{
|
||||
return indexOfAnyChar(word, bracketMarks);
|
||||
}
|
||||
|
||||
public static boolean isPunctuationMark(int ch)
|
||||
{
|
||||
return endOfParagraphMarks.indexOf(ch)>=0 || bracketMarks.indexOf(ch)>=0;
|
||||
}
|
||||
|
||||
public static boolean isEndOfParagraphMark(int ch)
|
||||
{
|
||||
return endOfParagraphMarks.indexOf(ch)>=0;
|
||||
}
|
||||
|
||||
public static boolean isEndOfSyllableMark(int ch)
|
||||
{
|
||||
return endOfSyllableMarks.indexOf(ch)>=0;
|
||||
}
|
||||
|
||||
public static boolean isMeaningful(String s)
|
||||
{
|
||||
for (int i=0; i<s.length(); i++)
|
||||
if (Character.isLetterOrDigit(s.charAt(i))) return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
public static String replace(String linea, String origSub, String newSub)
|
||||
{
|
||||
|
@ -89,7 +139,6 @@ public class Manipulate
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
public static String deleteSubstring (String string, String sub)
|
||||
{
|
||||
int pos = string.indexOf(sub), posEnd = pos + sub.length();
|
||||
|
@ -251,6 +300,7 @@ public class Manipulate
|
|||
nuevaLinea = replace(nuevaLinea, "kaSH", "k+Sh");
|
||||
nuevaLinea = replace(nuevaLinea, "SH", "Sh");
|
||||
nuevaLinea = replace(nuevaLinea, ":", "H");
|
||||
nuevaLinea = replace(nuevaLinea, "NH", "NaH");
|
||||
nuevaLinea = replace(nuevaLinea, "dh", "d+h");
|
||||
nuevaLinea = replace(nuevaLinea, "gh", "g+h");
|
||||
nuevaLinea = replace(nuevaLinea, "bh", "b+h");
|
||||
|
@ -294,19 +344,20 @@ public class Manipulate
|
|||
{
|
||||
chP = nuevaLinea.charAt(i-1);
|
||||
chN = nuevaLinea.charAt(i+1);
|
||||
if (Character.isLetter(chP) && !isVowel(chP) && isVowel(chN))
|
||||
if (isVowel(chN))
|
||||
{
|
||||
nuevaLinea = nuevaLinea.substring(0, i) + Character.toUpperCase(chN) + nuevaLinea.substring(i+2);
|
||||
len--;
|
||||
if (Character.isLetter(chP) && !isVowel(chP))
|
||||
{
|
||||
nuevaLinea = nuevaLinea.substring(0, i) + Character.toUpperCase(chN) + nuevaLinea.substring(i+2);
|
||||
len--;
|
||||
}
|
||||
else if (chP=='a' && (i==1 || i>1 && !Character.isLetter(nuevaLinea.charAt(i-2)) || chN == 'a' && (i+2==len || !Character.isLetter(nuevaLinea.charAt(i+2)))))
|
||||
{
|
||||
nuevaLinea = nuevaLinea.substring(0,i-1) + Character.toUpperCase(chN) + nuevaLinea.substring(i+2);
|
||||
len-=2;
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
case 'a':
|
||||
if ((i<len-3 && nuevaLinea.charAt(i+1)=='\'' && isVowel(nuevaLinea.charAt(i+2))) && (i==0 || !Character.isLetter(nuevaLinea.charAt(i-1))))
|
||||
{
|
||||
nuevaLinea = nuevaLinea.substring(0, i) + Character.toUpperCase(nuevaLinea.charAt(i+2)) + nuevaLinea.substring(i+3);
|
||||
len-=2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -416,14 +467,24 @@ public class Manipulate
|
|||
|
||||
public static String deleteQuotes(String s)
|
||||
{
|
||||
int length = s.length();
|
||||
int length = s.length(), pos;
|
||||
if (length>2)
|
||||
{
|
||||
if ((s.charAt(0)=='\"') && (s.charAt(length-1)=='\"'))
|
||||
return s.substring(1,length-1);
|
||||
if ((s.charAt(0)=='\"') && (s.charAt(length-1)=='\"'))
|
||||
return s.substring(1,length-1);
|
||||
|
||||
do
|
||||
{
|
||||
pos = s.indexOf("\"\"");
|
||||
if (pos<0) break;
|
||||
s = Manipulate.deleteSubstring(s, pos, pos+1);
|
||||
} while (true);
|
||||
}
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/** Syntax: java Manipulate [word-file] < source-dic-entries > dest-dic-entries
|
||||
|
||||
|
|
Loading…
Reference in a new issue