fixed the importing of dictionaries using '-' as a separator, without confusing such character with reverse vowel in the tibetanized sanskrit.

This commit is contained in:
amontano 2002-11-27 23:30:44 +00:00
parent c13adf9d14
commit c12088ce5d

View file

@ -36,6 +36,10 @@ public class BinaryFileGenerator extends LinkedList
private long posHijos; private long posHijos;
private String sil, def[]; private String sil, def[];
private static String delimiter; private static String delimiter;
private static int delimiterType;
private final static int delimiterGeneric=0;
private final static int delimiterAcip=1;
private final static int delimiterDash=2;
/** Number of dictionary. If 0, partial word (no definition). /** Number of dictionary. If 0, partial word (no definition).
*/ */
@ -47,7 +51,8 @@ public class BinaryFileGenerator extends LinkedList
{ {
wordRaf = null; wordRaf = null;
defRaf = null; defRaf = null;
delimiter = " - "; delimiter = null;
delimiterType=delimiterDash;
} }
public BinaryFileGenerator() public BinaryFileGenerator()
@ -110,186 +115,212 @@ public class BinaryFileGenerator extends LinkedList
boolean markerNotFound; boolean markerNotFound;
// used for acip dict // used for acip dict
if (delimiter==null) switch(delimiterType)
{ {
outAHere: case delimiterAcip:
while (true) outAHere:
{ while (true)
entrada=br.readLine();
if (entrada==null) break;
currentLine++;
entrada = entrada.trim();
len = entrada.length();
if (len<=0) continue;
// get page number
if (entrada.charAt(0)=='@')
{
marker = 1;
while(marker<len && Character.isDigit(entrada.charAt(marker)))
marker++;
temp = entrada.substring(1, marker);
if (temp.length()>0)
currentPage=Integer.parseInt(temp);
if (marker<len)
{
entrada = entrada.substring(marker).trim();
len = entrada.length();
}
else continue;
}
// get current letter
if (entrada.charAt(0)=='(' || entrada.charAt(0)=='{' || entrada.charAt(0)=='?')
{
currentLetter = entrada.substring(1, entrada.length()-2);
/*out.println(currentPage + ": " + currentLetter);
n++;*/
continue;
}
if (entrada.charAt(0)=='[')
{
marker=1;
markerNotFound=true;
do
{
while (marker<len && markerNotFound)
{
if (entrada.charAt(marker)==']') markerNotFound=false;
else marker++;
}
if (markerNotFound)
{
entrada=br.readLine();
if (entrada==null) break outAHere;
currentLine++;
len = entrada.length();
marker=0;
}
else break;
} while (true);
if (marker<len)
{
entrada = entrada.substring(marker+1).trim();
len = entrada.length();
if (len<=0) continue;
}
else continue;
}
// skip stuff. Add to previous definition.
if (entrada.startsWith("..."))
{ {
entrada=entrada.substring(3); entrada=br.readLine();
if (entrada==null) break;
currentLine++;
entrada = entrada.trim();
len = entrada.length(); len = entrada.length();
if (len<=0) continue; if (len<=0) continue;
}
// find definiendum // get page number
ch = entrada.charAt(0); if (entrada.charAt(0)=='@')
if (Character.isLetter(ch) || ch=='\'') {
{ marker = 1;
/* first criteria: if it is not the root letter of section it is part of the while(marker<len && Character.isDigit(entrada.charAt(marker)))
previous definition, probably a page change, else go for it with following marker++;
code: */ temp = entrada.substring(1, marker);
if (temp.length()>0)
currentPage=Integer.parseInt(temp);
if (marker<len)
{
entrada = entrada.substring(marker).trim();
len = entrada.length();
}
else continue;
}
// get first syllable to check base letter // get current letter
marker=1; if (entrada.charAt(0)=='(' || entrada.charAt(0)=='{' || entrada.charAt(0)=='?')
while (marker<len) {
currentLetter = entrada.substring(1, entrada.length()-2);
/*out.println(currentPage + ": " + currentLetter);
n++;*/
continue;
}
if (entrada.charAt(0)=='[')
{
marker=1;
markerNotFound=true;
do
{
while (marker<len && markerNotFound)
{
if (entrada.charAt(marker)==']') markerNotFound=false;
else marker++;
}
if (markerNotFound)
{
entrada=br.readLine();
if (entrada==null) break outAHere;
currentLine++;
len = entrada.length();
marker=0;
}
else break;
} while (true);
if (marker<len)
{
entrada = entrada.substring(marker+1).trim();
len = entrada.length();
if (len<=0) continue;
}
else continue;
}
// skip stuff. Add to previous definition.
if (entrada.startsWith("..."))
{
entrada=entrada.substring(3);
len = entrada.length();
if (len<=0) continue;
}
// find definiendum
ch = entrada.charAt(0);
if (Character.isLetter(ch) || ch=='\'')
{ {
ch = entrada.charAt(marker); /* first criteria: if it is not the root letter of section it is part of the
if (ch==' ' || ch=='/') break; previous definition, probably a page change, else go for it with following
marker++; code: */
}
if (status!=halfDefiniendum) temp = Manipulate.getBaseLetter(entrada.substring(0, marker)); // get first syllable to check base letter
marker=1;
while (marker<len)
{
ch = entrada.charAt(marker);
if (ch==' ' || ch=='/') break;
marker++;
}
// if line begins with current letter, probably it is a definiendum if (status!=halfDefiniendum) temp = Manipulate.getBaseLetter(entrada.substring(0, marker));
if (status==halfDefiniendum || currentLetter.equals(temp))
{ // if line begins with current letter, probably it is a definiendum
/* Since new definiendum was found, update last and collect new. No need to update if (status==halfDefiniendum || currentLetter.equals(temp))
status because it will be updated below. */
if (status==definition)
{ {
add(s1, s2, defNum); /* Since new definiendum was found, update last and collect new. No need to update
s1=""; s2=""; status because it will be updated below. */
} if (status==definition)
marker=marker2=1;
markerNotFound=true;
while (marker < len)
{
ch = entrada.charAt(marker);
switch(ch)
{ {
case '/': add(s1, s2, defNum);
markerNotFound=false; s1=""; s2="";
marker2=marker+1; }
break;
case '(': case '<': marker=marker2=1;
markerNotFound=false; markerNotFound=true;
marker2=marker;
break; while (marker < len)
case 'g': // verify "g " {
if (marker+1<len && Manipulate.isVowel(entrada.charAt(marker-1)) && entrada.charAt(marker+1)==' ') ch = entrada.charAt(marker);
{ switch(ch)
temp = entrada.substring(0, marker+1); {
if (!lastWeirdDefiniendum.startsWith(temp)) case '/':
markerNotFound=false;
marker2=marker+1;
break;
case '(': case '<':
markerNotFound=false;
marker2=marker;
break;
case 'g': // verify "g "
if (marker+1<len && Manipulate.isVowel(entrada.charAt(marker-1)) && entrada.charAt(marker+1)==' ')
{
temp = entrada.substring(0, marker+1);
if (!lastWeirdDefiniendum.startsWith(temp))
{
markerNotFound=false;
marker2=++marker;
lastWeirdDefiniendum=temp;
}
}
break;
case ' ': // verify " "
if (marker+1<len && entrada.charAt(marker+1)==' ')
{ {
markerNotFound=false; markerNotFound=false;
marker2=++marker; marker2=++marker;
lastWeirdDefiniendum=temp; }
} break;
} case '.':
break; if (marker+2<len && entrada.charAt(marker+1)=='.' && entrada.charAt(marker+2)=='.')
case ' ': // verify " " {
if (marker+1<len && entrada.charAt(marker+1)==' ') markerNotFound=false;
{ marker2=marker;
markerNotFound=false; }
marker2=++marker; break;
} default:
break; if (Character.isDigit(ch))
case '.': {
if (marker+2<len && entrada.charAt(marker+1)=='.' && entrada.charAt(marker+2)=='.') markerNotFound=false;
{ marker2=marker;
markerNotFound=false; }
marker2=marker; }
} if (markerNotFound) marker++;
break; else break;
default:
if (Character.isDigit(ch))
{
markerNotFound=false;
marker2=marker;
}
} }
if (markerNotFound) marker++;
else break;
}
/* either this is a definiendum that consists of several lines or /* either this is a definiendum that consists of several lines or
it is part of the last definition. */ it is part of the last definition. */
if (markerNotFound) if (markerNotFound)
{ {
/* assume that the definiendum goes on to the next line. */ /* assume that the definiendum goes on to the next line. */
s1 = s1 + entrada + " "; s1 = s1 + entrada + " ";
status=halfDefiniendum; status=halfDefiniendum;
} }
else else
{ {
s1 = s1 + entrada.substring(0,marker).trim(); s1 = s1 + entrada.substring(0,marker).trim();
s2 = "[" + currentPage + "] " + entrada.substring(marker2).trim(); s2 = "[" + currentPage + "] " + entrada.substring(marker2).trim();
status=definition; status=definition;
while (true)
{
entrada=br.readLine();
if (entrada==null)
{
add(s1, s2, defNum);
break outAHere;
}
currentLine++;
entrada = entrada.trim();
if (entrada.equals("")) break;
else
{
s2 = s2 + " " + entrada;
}
}
}
}
else // last line did not start with the current letter, it must still be part of the definition
{
s2 = s2 + " " + entrada;
while (true) while (true)
{ {
entrada=br.readLine(); entrada=br.readLine();
if (entrada==null) if (entrada==null)
{ {
add(s1, s2, defNum); add(s1, s2, defNum);
break outAHere; break outAHere;
} }
@ -297,86 +328,75 @@ public class BinaryFileGenerator extends LinkedList
currentLine++; currentLine++;
entrada = entrada.trim(); entrada = entrada.trim();
if (entrada.equals("")) break; if (entrada.equals("")) break;
else else
{ {
s2 = s2 + " " + entrada; s2 = s2 + " " + entrada;
} }
} }
}
}
} }
else // last line did not start with the current letter, it must still be part of the definition else // if first character was not a letter, it must still be part of definition
{ {
s2 = s2 + " " + entrada; s2 = s2 + " " + entrada;
while (true) while (true)
{ {
entrada=br.readLine(); entrada=br.readLine();
if (entrada==null) if (entrada==null)
{ {
add(s1, s2, defNum); add(s1, s2, defNum);
break outAHere; break outAHere;
} }
currentLine++; currentLine++;
entrada = entrada.trim(); entrada = entrada.trim();
if (entrada.equals("")) break; if (entrada.equals("")) break;
else else
{ {
s2 = s2 + " " + entrada; s2 = s2 + " " + entrada;
} }
} }
} }
} }
else // if first character was not a letter, it must still be part of definition break;
{ default:
s2 = s2 + " " + entrada; while ((entrada = br.readLine())!=null)
while (true) {
{ entrada = entrada.trim();
entrada=br.readLine(); if (!entrada.equals(""))
{
if (entrada==null) switch(delimiterType)
{
/* this is needed to make sure that the dash used in reverse vowels with extended
wylie is not confused with the dash that separates definiendum and definition. */
case delimiterDash:
marker=entrada.indexOf('-');
len = entrada.length();
while (marker>=0 && marker<len-1 && Manipulate.isVowel(entrada.charAt(marker+1)) && !Character.isWhitespace(entrada.charAt(marker-1)))
{
marker = entrada.indexOf('-', marker+1);
}
break;
default:
marker = entrada.indexOf(delimiter);
}
if (marker<0)
{ {
add(s1, s2, defNum); System.out.println("Error loading line " + currentLine + ", in file " + archivo + ":");
break outAHere; System.out.println(entrada);
} }
else
currentLine++; {
entrada = entrada.trim(); s1 = deleteQuotes(entrada.substring(0,marker).trim());
s2 = deleteQuotes(entrada.substring(marker+1).trim());
if (entrada.equals("")) break; add(s1, s2 , defNum);
else }
{ }
s2 = s2 + " " + entrada; }
} currentLine++;
}
}
}
} }
else
while ((entrada = br.readLine())!=null)
{
entrada = entrada.trim();
if (!entrada.equals(""))
{
marker = entrada.indexOf(delimiter);
if (marker<0)
{
System.out.println("Error loading line " + currentLine + ", in file " + archivo + ":");
System.out.println(entrada);
}
else
{
s1 = deleteQuotes(entrada.substring(0,marker).trim());
s2 = deleteQuotes(entrada.substring(marker+1).trim());
add(s1, s2 , defNum);
}
}
currentLine++;
}
} }
@ -547,11 +567,17 @@ public class BinaryFileGenerator extends LinkedList
if (args[0].charAt(0)=='-') if (args[0].charAt(0)=='-')
{ {
if (args[0].equals("-tab")) if (args[0].equals("-tab"))
{
delimiterType = delimiterGeneric;
delimiter="\t"; delimiter="\t";
}
else if (args[0].equals("-acip")) else if (args[0].equals("-acip"))
delimiter=null; delimiterType=delimiterAcip;
else else
{
delimiterType=delimiterGeneric;
delimiter=args[0].substring(1); delimiter=args[0].substring(1);
}
if (args.length>2) if (args.length>2)
{ {
printSintax(); printSintax();
@ -576,14 +602,23 @@ public class BinaryFileGenerator extends LinkedList
if (args[i].charAt(0)=='-') if (args[i].charAt(0)=='-')
{ {
if (args[i].equals("-tab")) if (args[i].equals("-tab"))
delimiter="\t"; {
delimiterType=delimiterGeneric;
delimiter="\t";
}
else if (args[i].equals("-acip")) else if (args[i].equals("-acip"))
delimiter=null; delimiterType=delimiterAcip;
else else
{
delimiterType=delimiterGeneric;
delimiter=args[i].substring(1); delimiter=args[i].substring(1);
}
i++; i++;
} }
else delimiter=" -"; else
{
delimiterType=delimiterDash;
}
sl.addFile(args[i] + ".txt", n); sl.addFile(args[i] + ".txt", n);
n++; i++; n++; i++;
} }