Fixed importation errors for the translation tool.

This commit is contained in:
amontano 2005-02-21 05:10:00 +00:00
parent 4b4787411b
commit 686756116f
4 changed files with 199 additions and 118 deletions

View file

@ -80,10 +80,6 @@ class AcipToTab
if (temp.length()>0)
{
currentPage=Integer.parseInt(temp);
if (currentPage==3141)
{
System.out.println("Hello!");
}
}
if (marker<len)
{
@ -151,7 +147,7 @@ class AcipToTab
while (marker<len)
{
ch = entrada.charAt(marker);
if (ch==' ' || ch=='/') break;
if (Manipulate.isEndOfSyllableMark(ch) || Manipulate.isEndOfParagraphMark(ch)) break;
marker++;
}
@ -184,50 +180,56 @@ class AcipToTab
while (marker < len)
{
ch = entrada.charAt(marker);
switch(ch)
{
case '/':
if (Manipulate.isEndOfParagraphMark(ch))
{
markerNotFound=false;
marker2=marker+1;
}
else if (Manipulate.isEndOfSyllableMark(ch))
{
if (marker+1<len && Manipulate.isEndOfSyllableMark(entrada.charAt(marker+1))) // verify " "
{
markerNotFound=false;
marker2=marker+1;
break;
case '(': case '<':
markerNotFound=false;
marker2=marker;
break;
case 'g': // verify "g "
if (marker+1<len && Manipulate.isVowel(entrada.charAt(marker-1)) && entrada.charAt(marker+1)==' ')
{
temp = entrada.substring(0, marker+1);
if (!lastWeirdDefiniendum.startsWith(temp))
marker2=++marker;
}
}
else
{
switch(ch)
{
case '(': case '<':
markerNotFound=false;
marker2=marker;
break;
case 'g': // verify "g "
if (marker+1<len && Manipulate.isVowel(entrada.charAt(marker-1)) && Manipulate.isEndOfSyllableMark(entrada.charAt(marker+1)))
{
temp = entrada.substring(0, marker+1);
if (!lastWeirdDefiniendum.startsWith(temp))
{
markerNotFound=false;
marker2=++marker;
lastWeirdDefiniendum=temp;
//n++;
// out.println(currentPage + ": " + entrada);
}
}
break;
case '.':
if (marker+2<len && entrada.charAt(marker+1)=='.' && entrada.charAt(marker+2)=='.')
{
markerNotFound=false;
marker2=++marker;
lastWeirdDefiniendum=temp;
//n++;
// out.println(currentPage + ": " + entrada);
}
}
break;
case ' ': // verify " "
if (marker+1<len && entrada.charAt(marker+1)==' ')
{
markerNotFound=false;
marker2=++marker;
}
break;
case '.':
if (marker+2<len && entrada.charAt(marker+1)=='.' && entrada.charAt(marker+2)=='.')
{
markerNotFound=false;
marker2=marker;
}
break;
default:
if (Character.isDigit(ch))
{
markerNotFound=false;
marker2=marker;
}
marker2=marker;
}
break;
default:
if (Character.isDigit(ch))
{
markerNotFound=false;
marker2=marker;
}
}
}
if (markerNotFound) marker++;

View file

@ -182,7 +182,14 @@ public class BinaryFileGenerator extends SimplifiedLinkedList
private BinaryFileGenerator(String sil, String def, int numDef)
{
super();
int marker = sil.indexOf(" ");
int marker;
while (true)
{
marker = Manipulate.indexOfExtendedEndOfSyllableMark(sil);
if (marker==0) sil = sil.substring(1);
else if (marker==sil.length()-1) sil = sil.substring(0,sil.length()-1);
else break;
}
// fix for updates
this.sourceDef = new ByteDictionarySource();
@ -310,7 +317,7 @@ public class BinaryFileGenerator extends SimplifiedLinkedList
while (marker<len)
{
ch = entrada.charAt(marker);
if (ch==' ' || ch=='/') break;
if (Manipulate.isEndOfSyllableMark(ch) || Manipulate.isEndOfParagraphMark(ch)) break;
marker++;
}
@ -333,48 +340,54 @@ public class BinaryFileGenerator extends SimplifiedLinkedList
while (marker < len)
{
ch = entrada.charAt(marker);
switch(ch)
{
case '/':
if (Manipulate.isEndOfParagraphMark(ch))
{
markerNotFound=false;
marker2=marker+1;
}
else if (Manipulate.isEndOfSyllableMark(ch))
{
if (marker+1<len && Manipulate.isEndOfSyllableMark(entrada.charAt(marker+1))) // verify " "
{
markerNotFound=false;
marker2=marker+1;
break;
case '(': case '<':
markerNotFound=false;
marker2=marker;
break;
case 'g': // verify "g "
if (marker+1<len && Manipulate.isVowel(entrada.charAt(marker-1)) && entrada.charAt(marker+1)==' ')
{
temp = entrada.substring(0, marker+1);
if (!lastWeirdDefiniendum.startsWith(temp))
{
marker2=++marker;
}
}
else
{
switch(ch)
{
case '(': case '<':
markerNotFound=false;
marker2=marker;
break;
case 'g': // verify "g "
if (marker+1<len && Manipulate.isVowel(entrada.charAt(marker-1)) && Manipulate.isEndOfSyllableMark(entrada.charAt(marker+1)))
{
temp = entrada.substring(0, marker+1);
if (!lastWeirdDefiniendum.startsWith(temp))
{
markerNotFound=false;
marker2=++marker;
lastWeirdDefiniendum=temp;
}
}
break;
case '.':
if (marker+2<len && entrada.charAt(marker+1)=='.' && entrada.charAt(marker+2)=='.')
{
markerNotFound=false;
marker2=++marker;
lastWeirdDefiniendum=temp;
}
}
break;
case ' ': // verify " "
if (marker+1<len && entrada.charAt(marker+1)==' ')
{
markerNotFound=false;
marker2=++marker;
}
break;
case '.':
if (marker+2<len && entrada.charAt(marker+1)=='.' && entrada.charAt(marker+2)=='.')
{
markerNotFound=false;
marker2=marker;
}
break;
default:
if (Character.isDigit(ch))
{
markerNotFound=false;
marker2=marker;
}
marker2=marker;
}
break;
default:
if (Character.isDigit(ch))
{
markerNotFound=false;
marker2=marker;
}
}
}
if (markerNotFound) marker++;
else break;
@ -486,16 +499,20 @@ public class BinaryFileGenerator extends SimplifiedLinkedList
default:
marker = entrada.indexOf(delimiter);
}
if (marker<0)
if (marker<=0)
{
System.out.println("Error loading line " + currentLine + ", in file " + archivo + ":");
System.out.println(entrada);
}
else
{
marker2 = Manipulate.indexOfBracketMarks(entrada.substring(0,marker));
if (marker2>0) marker = marker2;
s1 = Manipulate.deleteQuotes(entrada.substring(0,marker).trim());
s2 = Manipulate.deleteQuotes(entrada.substring(marker+delimiter.length()).trim());
if (!s2.equals(""))
s2 = Manipulate.deleteQuotes(entrada.substring(marker+delimiter.length())).trim();
if (Manipulate.isMeaningful(s2))
{
if (currentLine%5000==0)
{
@ -527,8 +544,16 @@ public class BinaryFileGenerator extends SimplifiedLinkedList
Link link, newLink;
BinaryFileGenerator ultimo;
String firstSillable;
int marker = word.indexOf(" "), comp;
int marker, comp;
while (true)
{
marker = Manipulate.indexOfExtendedEndOfSyllableMark(word);
if (marker==0) word = word.substring(1);
else if (marker==word.length()-1) word = word.substring(0,word.length()-1);
else break;
}
if (marker<0)
firstSillable = word;
else firstSillable = word.substring(0,marker);

View file

@ -35,8 +35,6 @@ public class LocalTibetanScanner extends TibetanScanner
private SyllableListTree raiz, silActual, lastCompSil, silAnterior;
private String wordActual, lastCompWord;
private Vector floatingSil;
private static String endOfParagraphMarks = "/;|!:[]^@#$%=<>(){}";
private static String endOfSyllableMarks = " _\t";
static
{
@ -295,11 +293,6 @@ public class LocalTibetanScanner extends TibetanScanner
}
}
private boolean isEndOfSyllable(int ch)
{
return (endOfSyllableMarks.indexOf(ch)>-1);
}
public void scanLine(String linea)
{
int init = 0, fin;
@ -325,7 +318,7 @@ outAHere:
if (init>=linea.length())
break outAHere;
ch = linea.charAt(init);
if (endOfParagraphMarks.indexOf(ch)>=0)
if (Manipulate.isPunctuationMark(ch))
{
if (doNotFinishUp)
{
@ -334,7 +327,7 @@ outAHere:
}
wordList.addLast(new PunctuationMark(ch));
}
else if (endOfSyllableMarks.indexOf(ch)<0)
else if (!Manipulate.isEndOfSyllableMark(ch))
break;
init++;
@ -350,12 +343,12 @@ outAHere:
while (fin < linea.length())
{
ch = linea.charAt(fin);
if (endOfParagraphMarks.indexOf(ch)>=0)
if (Manipulate.isPunctuationMark(ch))
{
doNotFinishUp = false;
break;
}
else if (endOfSyllableMarks.indexOf(ch)>=0)
else if (Manipulate.isEndOfSyllableMark(ch))
{
break;
}

View file

@ -28,6 +28,11 @@ import org.thdl.util.*;
public class Manipulate
{
private static String endOfParagraphMarks = "/;|!:^@#$%=";
private static String bracketMarks = "<>(){}[]";
private static String endOfSyllableMarks = " _\t";
private static String allStopMarkers = endOfSyllableMarks + endOfParagraphMarks + bracketMarks;
/* public static String[] parseFields (String s, char delimiter)
{
int pos;
@ -43,7 +48,52 @@ public class Manipulate
ll.addLast(s.trim());
return ll.toStringArray();
}*/
}*/
public static int indexOfAnyChar(String str, String chars)
{
int i;
for (i=0; i<str.length(); i++)
{
if (chars.indexOf(str.charAt(i))>=0)
return i;
}
return -1;
}
public static int indexOfExtendedEndOfSyllableMark(String word)
{
return indexOfAnyChar(word, allStopMarkers);
}
public static int indexOfBracketMarks(String word)
{
return indexOfAnyChar(word, bracketMarks);
}
public static boolean isPunctuationMark(int ch)
{
return endOfParagraphMarks.indexOf(ch)>=0 || bracketMarks.indexOf(ch)>=0;
}
public static boolean isEndOfParagraphMark(int ch)
{
return endOfParagraphMarks.indexOf(ch)>=0;
}
public static boolean isEndOfSyllableMark(int ch)
{
return endOfSyllableMarks.indexOf(ch)>=0;
}
public static boolean isMeaningful(String s)
{
for (int i=0; i<s.length(); i++)
if (Character.isLetterOrDigit(s.charAt(i))) return true;
return false;
}
public static String replace(String linea, String origSub, String newSub)
{
@ -89,7 +139,6 @@ public class Manipulate
}
}
public static String deleteSubstring (String string, String sub)
{
int pos = string.indexOf(sub), posEnd = pos + sub.length();
@ -251,6 +300,7 @@ public class Manipulate
nuevaLinea = replace(nuevaLinea, "kaSH", "k+Sh");
nuevaLinea = replace(nuevaLinea, "SH", "Sh");
nuevaLinea = replace(nuevaLinea, ":", "H");
nuevaLinea = replace(nuevaLinea, "NH", "NaH");
nuevaLinea = replace(nuevaLinea, "dh", "d+h");
nuevaLinea = replace(nuevaLinea, "gh", "g+h");
nuevaLinea = replace(nuevaLinea, "bh", "b+h");
@ -294,19 +344,20 @@ public class Manipulate
{
chP = nuevaLinea.charAt(i-1);
chN = nuevaLinea.charAt(i+1);
if (Character.isLetter(chP) && !isVowel(chP) && isVowel(chN))
if (isVowel(chN))
{
nuevaLinea = nuevaLinea.substring(0, i) + Character.toUpperCase(chN) + nuevaLinea.substring(i+2);
len--;
if (Character.isLetter(chP) && !isVowel(chP))
{
nuevaLinea = nuevaLinea.substring(0, i) + Character.toUpperCase(chN) + nuevaLinea.substring(i+2);
len--;
}
else if (chP=='a' && (i==1 || i>1 && !Character.isLetter(nuevaLinea.charAt(i-2)) || chN == 'a' && (i+2==len || !Character.isLetter(nuevaLinea.charAt(i+2)))))
{
nuevaLinea = nuevaLinea.substring(0,i-1) + Character.toUpperCase(chN) + nuevaLinea.substring(i+2);
len-=2;
}
}
}
break;
case 'a':
if ((i<len-3 && nuevaLinea.charAt(i+1)=='\'' && isVowel(nuevaLinea.charAt(i+2))) && (i==0 || !Character.isLetter(nuevaLinea.charAt(i-1))))
{
nuevaLinea = nuevaLinea.substring(0, i) + Character.toUpperCase(nuevaLinea.charAt(i+2)) + nuevaLinea.substring(i+3);
len-=2;
}
}
}
@ -416,14 +467,24 @@ public class Manipulate
public static String deleteQuotes(String s)
{
int length = s.length();
int length = s.length(), pos;
if (length>2)
{
if ((s.charAt(0)=='\"') && (s.charAt(length-1)=='\"'))
return s.substring(1,length-1);
if ((s.charAt(0)=='\"') && (s.charAt(length-1)=='\"'))
return s.substring(1,length-1);
do
{
pos = s.indexOf("\"\"");
if (pos<0) break;
s = Manipulate.deleteSubstring(s, pos, pos+1);
} while (true);
}
return s;
}
/** Syntax: java Manipulate [word-file] < source-dic-entries > dest-dic-entries