Updated the dictionary structure to allow grouping of dictionaries, this is the first step to try to clean up a bit the massive repetitions in dictionaries.

This commit is contained in:
amontano 2004-08-13 04:47:35 +00:00
parent 6bb0646f1c
commit afd3a95a21
22 changed files with 1533 additions and 297 deletions

View file

@ -0,0 +1,343 @@
package org.thdl.tib.scanner;
import java.net.*;
import java.io.*;
class AcipToTab
{
private BufferedReader in;
private PrintWriter out;
private String currentDefiniendum, currentDefinition;
public AcipToTab(BufferedReader in, PrintWriter out)
{
this.in = in;
this.out = out;
}
public void add()
{
out.println(currentDefiniendum + '\t' + currentDefinition);
}
public static void main (String[] args) throws Exception
{
PrintWriter out;
BufferedReader in=null;
boolean file=false;
switch (args.length)
{
case 0: out = new PrintWriter(System.out);
in = new BufferedReader(new InputStreamReader(System.in));
break;
case 1: out = new PrintWriter(System.out);
file = true;
break;
default: out = new PrintWriter(new FileOutputStream(args[1]));
file = true;
}
if (file)
{
if (args[0].indexOf("http://") >= 0)
in = new BufferedReader(new InputStreamReader(new BufferedInputStream((new URL(args[0])).openStream())));
else
in = new BufferedReader(new InputStreamReader(new FileInputStream(args[0])));
}
new AcipToTab(in, out).run();
}
public void run() throws Exception
{
final short newDefiniendum=1, halfDefiniendum=2, definition=3;
short status=newDefiniendum;
int marker, len, marker2, n=0, total=0, currentPage=0, currentLine=1, pos;
char ch;
String entrada="", currentLetter="", temp="", lastDefiniendum="", lastWeirdDefiniendum="";
boolean markerNotFound;
currentDefiniendum="";
currentDefinition="";
outAHere:
while (true)
{
entrada=in.readLine();
if (entrada==null) break;
currentLine++;
entrada = entrada.trim();
len = entrada.length();
if (len<=0) continue;
// get page number
if (entrada.charAt(0)=='@')
{
marker = 1;
while(marker<len && Character.isDigit(entrada.charAt(marker)))
marker++;
temp = entrada.substring(1, marker);
if (temp.length()>0)
{
currentPage=Integer.parseInt(temp);
if (currentPage==3141)
{
System.out.println("Hello!");
}
}
if (marker<len)
{
entrada = entrada.substring(marker).trim();
len = entrada.length();
}
else continue;
}
// get current letter
if (entrada.charAt(0)=='(' || entrada.charAt(0)=='{' || entrada.charAt(0)=='?')
{
currentLetter = entrada.substring(1, entrada.length()-2);
continue;
}
if (entrada.charAt(0)=='[')
{
marker=1;
markerNotFound=true;
do
{
while (marker<len && markerNotFound)
{
if (entrada.charAt(marker)==']') markerNotFound=false;
else marker++;
}
if (markerNotFound)
{
entrada=in.readLine();
if (entrada==null) break outAHere;
currentLine++;
len = entrada.length();
marker=0;
}
else break;
} while (true);
if (marker<len)
{
entrada = entrada.substring(marker+1).trim();
len = entrada.length();
if (len<=0) continue;
}
else continue;
}
// skip stuff. Add to previous definition.
if (entrada.startsWith("..."))
{
entrada=entrada.substring(3);
len = entrada.length();
if (len<=0) continue;
}
// find definiendum
ch = entrada.charAt(0);
if (Character.isLetter(ch) || ch=='\'')
{
/* first criteria: if it is not the root letter of section it is part of the
previous definition, probably a page change, else go for it with following
code: */
// get first syllable to check base letter
marker=1;
while (marker<len)
{
ch = entrada.charAt(marker);
if (ch==' ' || ch=='/') break;
marker++;
}
if (status!=halfDefiniendum) temp = Manipulate.getBaseLetter(entrada.substring(0, marker));
// if line begins with current letter, probably it is a definiendum
if (status==halfDefiniendum || currentLetter.equals(temp))
{
/* Since new definiendum was found, update last and collect new. No need to update
status because it will be updated below. */
if (status==definition)
{
// compare lastDefiniendum with currentDefiniendum
total++;
// add here
add();
/* include this to not include transliterated sanskrit
*/
//if (currentDefiniendum.indexOf("+")<0 && lastDefiniendum.indexOf("+")<0 && new TibetanString(lastDefiniendum).compareTo(new TibetanString(currentDefiniendum))>0) n++;
lastDefiniendum=currentDefiniendum;
currentDefiniendum="";
currentDefinition="";
}
marker=marker2=1;
markerNotFound=true;
while (marker < len)
{
ch = entrada.charAt(marker);
switch(ch)
{
case '/':
markerNotFound=false;
marker2=marker+1;
break;
case '(': case '<':
markerNotFound=false;
marker2=marker;
break;
case 'g': // verify "g "
if (marker+1<len && Manipulate.isVowel(entrada.charAt(marker-1)) && entrada.charAt(marker+1)==' ')
{
temp = entrada.substring(0, marker+1);
if (!lastWeirdDefiniendum.startsWith(temp))
{
markerNotFound=false;
marker2=++marker;
lastWeirdDefiniendum=temp;
//n++;
// out.println(currentPage + ": " + entrada);
}
}
break;
case ' ': // verify " "
if (marker+1<len && entrada.charAt(marker+1)==' ')
{
markerNotFound=false;
marker2=++marker;
}
break;
case '.':
if (marker+2<len && entrada.charAt(marker+1)=='.' && entrada.charAt(marker+2)=='.')
{
markerNotFound=false;
marker2=marker;
}
break;
default:
if (Character.isDigit(ch))
{
markerNotFound=false;
marker2=marker;
}
}
if (markerNotFound) marker++;
else break;
}
/* check to see if the is a possible problem with the g suffix.
pos = entrada.indexOf("g ");
if (pos>0 && Manipulate.isVowel(entrada.charAt(pos-1)) && (markerNotFound || entrada.substring(0,pos+1).length() < entrada.substring(0, marker).trim().length()))
{
// out.println(currentPage + ": " + entrada);
n++;
}*/
/* either this is a definiendum that consists of several lines or
it is part of the last definition. */
if (markerNotFound)
{
/* assume that the definiendum goes on to the next line. */
currentDefiniendum = currentDefiniendum + " ";
status=halfDefiniendum;
}
else
{
// total++;
currentDefiniendum = currentDefiniendum + entrada.substring(0,marker).trim();
currentDefinition = "[" + currentPage + "] " + entrada.substring(marker2).trim();
status=definition;
while (true)
{
entrada=in.readLine();
if (entrada==null)
{
// add here
add();
// if (new TibetanString(lastDefiniendum).compareTo(new TibetanString(currentDefiniendum))>0) n++;
break outAHere;
}
currentLine++;
entrada = entrada.trim();
if (entrada.equals("")) break;
else
{
currentDefinition = currentDefinition + " " + entrada;
}
}
}
}
else // last line did not start with the current letter, it must still be part of the definition
{
currentDefinition = currentDefinition + " " + entrada;
while (true)
{
entrada=in.readLine();
if (entrada==null)
{
// add here
add();
// if (new TibetanString(lastDefiniendum).compareTo(new TibetanString(currentDefiniendum))>0) n++;
break outAHere;
}
currentLine++;
entrada = entrada.trim();
if (entrada.equals("")) break;
{
currentDefinition = currentDefinition + " " + entrada;
}
}
}
} else // if first character was not a letter, it must still be part of definition
{
currentDefinition = currentDefinition + " " + entrada;
while (true)
{
entrada=in.readLine();
if (entrada==null)
{
// add here
add();
break outAHere;
}
currentLine++;
entrada = entrada.trim();
if (entrada.equals("")) break;
else
{
currentDefinition = currentDefinition + " " + entrada;
}
}
}
}
// out.println(n + " / " + total);
out.flush();
}
}

View file

@ -73,13 +73,10 @@ public class AppletScannerFilter extends JApplet implements ActionListener, Focu
// sp = new SimpleScannerPanel(url); // sp = new SimpleScannerPanel(url);
sp = new DuffScannerPanel(url); sp = new DuffScannerPanel(url);
sp.addFocusListener(this); sp.addFocusListener(this);
setContentPane(sp); setContentPane(sp);
// setup the menu. Almost identical to WindowScannerFilter, but using swing. // setup the menu. Almost identical to WindowScannerFilter, but using swing.
JMenuBar mb = new JMenuBar(); JMenuBar mb = new JMenuBar();
mnuEdit = new JMenu ("Edit"); mnuEdit = new JMenu ("Edit");
mnuCut = new JMenuItem("Cut"); mnuCut = new JMenuItem("Cut");
@ -107,13 +104,14 @@ public class AppletScannerFilter extends JApplet implements ActionListener, Focu
mnuClear.addActionListener(this); mnuClear.addActionListener(this);
mb.add(mnuEdit); mb.add(mnuEdit);
JMenu m = new JMenu("View"); JMenu m;
/* m = new JMenu("View");
tibScript = new JCheckBoxMenuItem("Tibetan Script", true); tibScript = new JCheckBoxMenuItem("Tibetan Script", true);
m.add(tibScript); m.add(tibScript);
tibScript.addItemListener(this); tibScript.addItemListener(this);
mb.add(m); mb.add(m);*/
//JMenuItem
aboutItem = new JMenuItem("About..."); aboutItem = new JMenuItem("About...");
aboutItem.addActionListener(this); aboutItem.addActionListener(this);

View file

@ -150,6 +150,8 @@ myglossary_uma.txt</i> in the transliteration format explained above.<br>
*/ */
public class BinaryFileGenerator extends SimplifiedLinkedList public class BinaryFileGenerator extends SimplifiedLinkedList
{ {
private static final int versionNumber = 3;
private long posHijos; private long posHijos;
private String sil, def[]; private String sil, def[];
public final static int delimiterGeneric=0; public final static int delimiterGeneric=0;
@ -158,7 +160,7 @@ public class BinaryFileGenerator extends SimplifiedLinkedList
/** Number of dictionary. If 0, partial word (no definition). /** Number of dictionary. If 0, partial word (no definition).
*/ */
private DictionarySource sourceDef; private ByteDictionarySource sourceDef;
public static RandomAccessFile wordRaf; public static RandomAccessFile wordRaf;
private static RandomAccessFile defRaf; private static RandomAccessFile defRaf;
@ -181,14 +183,16 @@ public class BinaryFileGenerator extends SimplifiedLinkedList
{ {
super(); super();
int marker = sil.indexOf(" "); int marker = sil.indexOf(" ");
this.sourceDef = new DictionarySource();
// fix for updates
this.sourceDef = new ByteDictionarySource();
if (marker<0) if (marker<0)
{ {
this.sil = sil; this.sil = sil;
this.def = new String[1]; this.def = new String[1];
this.def[0] = def; this.def[0] = def;
this.sourceDef.add(numDef); this.sourceDef.addNewDef(numDef);
} }
else else
{ {
@ -208,9 +212,7 @@ public class BinaryFileGenerator extends SimplifiedLinkedList
{ {
final short newDefiniendum=1, halfDefiniendum=2, definition=3; final short newDefiniendum=1, halfDefiniendum=2, definition=3;
short status=newDefiniendum; short status=newDefiniendum;
int marker, len, marker2; int marker, len, marker2, currentPage=0, currentLine=1;
// int n=0;
int currentPage=0, currentLine=1;
char ch; char ch;
BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(archivo))); BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(archivo)));
String entrada="", s1="", s2="", currentLetter="", temp="", lastWeirdDefiniendum="", alternateWords[]; String entrada="", s1="", s2="", currentLetter="", temp="", lastWeirdDefiniendum="", alternateWords[];
@ -495,6 +497,11 @@ public class BinaryFileGenerator extends SimplifiedLinkedList
s2 = Manipulate.deleteQuotes(entrada.substring(marker+delimiter.length()).trim()); s2 = Manipulate.deleteQuotes(entrada.substring(marker+delimiter.length()).trim());
if (!s2.equals("")) if (!s2.equals(""))
{ {
if (currentLine%5000==0)
{
System.out.println("Adding " + s1 + "...");
System.out.flush();
}
marker2 = s1.indexOf(';'); marker2 = s1.indexOf(';');
if (marker2>0) if (marker2>0)
{ {
@ -565,31 +572,233 @@ public class BinaryFileGenerator extends SimplifiedLinkedList
} }
} }
private void reGroup (int n)
{
int i, pos, posEnd;
for (i=0; i<def.length; i++)
{
if (i!=n)
{
// deal with repetitions of definitions
if (def[i].length()>=def[n].length())
{
pos = def[i].indexOf(def[n]);
// if it is the same String exactly
if (pos==0 && def[i].length()==def[n].length())
{
if (i<n)
{
sourceDef.addDictToDef(sourceDef.getDef(n), i);
def = Manipulate.deleteString(def, n);
sourceDef.deleteDef(n);
n = i;
continue;
}
else
{
sourceDef.addDictToDef(sourceDef.getDef(i), n);
def = Manipulate.deleteString(def, i);
sourceDef.deleteDef(i);
i--;
continue;
}
}
else
{
posEnd = pos + def[n].length();
if ((pos==0 || (pos>0 && !Character.isLetter(def[i].charAt(pos-1)))) && (posEnd==def[i].length() || !Character.isLetter(def[i].charAt(posEnd))))
{
if(sourceDef.getDef(i).contains(sourceDef.getDef(n)))
{
def = Manipulate.deleteString(def, n);
sourceDef.deleteDef(n);
return;
}
// else
sourceDef.addDictToDef(sourceDef.getDef(i), n);
do
{
def[i] = Manipulate.replace(def[i], pos, posEnd, "*");
pos = def[i].indexOf(def[n]);
posEnd = pos + def[n].length();
} while ((pos==0 || (pos>0 && !Character.isLetter(def[i].charAt(pos-1)))) && (posEnd==def[i].length() || !Character.isLetter(def[i].charAt(posEnd))));
if (i<n)
{
def = Manipulate.addString(def, def[n], i);
def = Manipulate.deleteString(def, n+1);
sourceDef.insertDef(sourceDef.getDef(n), i);
sourceDef.deleteDef(n+1);
n = i;
reGroup(i+1);
}
else
{
reGroup(i);
}
}
}
}
else
{
pos = def[n].indexOf(def[i]);
posEnd = pos + def[i].length();
if ((pos==0 || (pos>0 && !Character.isLetter(def[n].charAt(pos-1)))) && (posEnd==def[n].length() || !Character.isLetter(def[n].charAt(posEnd))))
{
if (sourceDef.getDef(n).contains(sourceDef.getDef(i)))
{
def = Manipulate.deleteString(def, i);
sourceDef.deleteDef(i);
i--;
continue;
}
sourceDef.addDictToDef(sourceDef.getDef(n), i);
do
{
def[n] = Manipulate.replace(def[n], pos, posEnd, "*");
pos = def[n].indexOf(def[i]);
posEnd = pos + def[i].length();
} while ((pos==0 || (pos>0 && !Character.isLetter(def[n].charAt(pos-1)))) && (posEnd==def[n].length() || !Character.isLetter(def[n].charAt(posEnd))));
i=-1; // start over
continue;
}
}
// deal with repetition of dictionaries
if (sourceDef.getDef(i).equals(sourceDef.getDef(n)))
{
if (i<n)
{
def[i] = def[i] + ". " + def[n];
def = Manipulate.deleteString(def, n);
sourceDef.deleteDef(n);
n = i;
continue;
}
else
{
def[n] = def[n] + ". " + def[i];
def = Manipulate.deleteString(def, i);
sourceDef.deleteDef(i);
}
}
}
}
}
private void addMoreDef(String def, int numDef) private void addMoreDef(String def, int numDef)
{ {
String temp;
boolean notAlreadyThere, changed;
int i, pos, posEnd;
if (this.def==null) if (this.def==null)
{ {
// add a new definition for this dictionary
this.def = new String[1]; this.def = new String[1];
this.def[0] = def; this.def[0] = def;
sourceDef.add(numDef); //sourceDef.add(numDef);
sourceDef.addNewDef(numDef);
} }
else else
{ {
// if the word is repeated in the same dictionary notAlreadyThere = true;
if (sourceDef.contains(numDef)) do
this.def[this.def.length-1] = this.def[this.def.length-1] + ". " + def; {
i=0;
changed = false;
while (notAlreadyThere && i<this.def.length)
{
if (this.def[i].length()>=def.length())
{
pos = this.def[i].indexOf(def);
posEnd = pos + def.length();
if ((pos==0 || (pos>0 && !Character.isLetter(this.def[i].charAt(pos-1)))) && (posEnd==this.def[i].length() || !Character.isLetter(this.def[i].charAt(posEnd))))
{
if (!sourceDef.isDictInDef(numDef, i))
{
if (this.def[i].length()>def.length())
{
//temp = Manipulate.deleteSubstring(this.def[i], pos, posEnd);
temp = this.def[i];
do
{
temp = Manipulate.replace(temp, pos, posEnd, "*");
pos = temp.indexOf(def);
posEnd = pos + def.length();
} while ((pos==0 || (pos>0 && !Character.isLetter(temp.charAt(pos-1)))) && (posEnd==temp.length() || !Character.isLetter(temp.charAt(posEnd))));
this.def[i] = def;
this.def = Manipulate.addString(this.def, temp, i+1);
sourceDef.dubDef(i);
sourceDef.addDictToDef(numDef, i);
reGroup(i);
if (i+1<this.def.length) reGroup(i+1);
else reGroup(this.def.length-1);
}
else sourceDef.addDictToDef(numDef, i);
}
notAlreadyThere = false;
changed = false;
}
}
else else
{ {
int i=0; pos = def.indexOf(this.def[i]);
String newDef[] = new String[this.def.length+1]; posEnd = pos + this.def[i].length();
while(i<this.def.length)
if ((pos==0 || (pos>0 && !Character.isLetter(def.charAt(pos-1)))) && (posEnd==def.length() || !Character.isLetter(def.charAt(posEnd))))
{ {
newDef[i] = this.def[i]; if (sourceDef.isDictInDefAlone(numDef, i))
{
this.def[i] = def;
reGroup(i);
}
else
{
sourceDef.addDictToDef(numDef, i);
do
{
//def = Manipulate.deleteSubstring(def, pos, posEnd);
def = Manipulate.replace(def, pos, posEnd, "*");
pos = def.indexOf(this.def[i]);
posEnd = pos + this.def[i].length();
} while ((pos==0 || (pos>0 && !Character.isLetter(def.charAt(pos-1)))) && (posEnd==def.length() || !Character.isLetter(def.charAt(posEnd))));
}
changed = true;
}
}
i++; i++;
} }
newDef[i] = def; } while (changed);
this.def = newDef;
sourceDef.add(numDef); if (notAlreadyThere)
{
// check if it is a duplicate for the same dictionary.
i = sourceDef.containsAlone(numDef);
if (i>-1)
{
this.def[i] = this.def[i] + ". " + def;
reGroup(i);
}
else
{
this.def = Manipulate.addString(this.def, def, this.def.length);
sourceDef.addNewDef(numDef);
reGroup(this.def.length-1);
}
} }
} }
} }
@ -681,6 +890,12 @@ public class BinaryFileGenerator extends SimplifiedLinkedList
print(); print();
wordRaf.writeInt((int)posHijos); wordRaf.writeInt((int)posHijos);
// write version marker
wordRaf.writeShort(-1);
wordRaf.writeByte(-1);
// write version number
wordRaf.writeByte(versionNumber);
} }
public static void main(String args[]) throws Exception public static void main(String args[]) throws Exception
@ -754,11 +969,14 @@ public class BinaryFileGenerator extends SimplifiedLinkedList
{ {
delimiterType=delimiterDash; delimiterType=delimiterDash;
} }
System.out.println("\nProcessing " + args[i] + "...");
sl.addFile(args[i] + ".txt", delimiterType, delimiter, n); sl.addFile(args[i] + ".txt", delimiterType, delimiter, n);
n++; i++; n++; i++;
} }
} }
} }
System.out.println("Writing to file " + args[a] + "...");
System.out.flush();
sl.generateDatabase(args[a]); sl.generateDatabase(args[a]);
} }
} }

View file

@ -0,0 +1,171 @@
/*
The contents of this file are subject to the AMP Open Community License
Version 1.0 (the "License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License on the AMP web site
(http://www.tibet.iteso.mx/Guatemala/).
Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
License for the specific terms governing rights and limitations under the
License.
The Initial Developer of this software is Andres Montano Pellegrini. Portions
created by Andres Montano Pellegrini are Copyright 2001 Andres Montano
Pellegrini. All Rights Reserved.
Contributor(s): ______________________________________.
*/
package org.thdl.tib.scanner;
import java.io.*;
/** Specifies a subset of dictionaries among a set of
dictionaries. Supports a maximum of 30 dictionaries.
@author Andr&eacute;s Montano Pellegrini
*/
public class BitDictionarySource extends DictionarySource
{
private int dicts;
/** Last bit of word; 1 if there are more brothers.*/
private static final int lastBit=1073741824;
private static final int allDicts=lastBit-1;
public BitDictionarySource()
{
dicts = 0;
}
public BitDictionarySource(int dicts)
{
this.dicts = dicts;
}
public boolean equals(Object obj)
{
BitDictionarySource objB = (BitDictionarySource) obj;
return (this.getDicts()==objB.getDicts());
}
/** Returns an instance of DictionarySource with all dictionaries selected */
public BitDictionarySource getAllDictionaries()
{
BitDictionarySource ds = new BitDictionarySource();
ds.setDicts(allDicts);
return ds;
}
/** Marks all dictionaries as selected */
public void setAllDictionaries()
{
dicts = allDicts;
}
/** Assumes dicts is an array of bits, and selects the dictionaries marked by
each bit. */
public void setDicts(int dicts)
{
this.dicts = dicts;
}
/** Returns an array of bits representing the selected dictionaries. */
public int getDicts()
{
return dicts;
}
private int getBits(int n)
{
return 1 << n;
}
public boolean contains(int dict)
{
return (dicts & getBits(dict))>0;
}
public boolean contains(BitDictionarySource dicts)
{
return this.intersection(dicts).equals(dicts);
}
/** Marks the dictionary "dict" as selected */
public void add(int dict)
{
dicts|= getBits(dict);
}
public void add(BitDictionarySource dicts)
{
this.dicts|= dicts.dicts;
}
/** Write to file using BinaryFileGenerator */
public void print(boolean hasNext, DataOutput raf) throws IOException
{
int numDict;
if (hasNext) numDict = lastBit | dicts;
else numDict = dicts;
raf.writeInt(numDict);
}
public void read(DataInput raf) throws IOException
{
setDicts(raf.readInt());
}
public boolean hasBrothers()
{
return (dicts & lastBit)>0;
}
public int countDefs()
{
int n, source;
for (n=0, source = dicts & allDicts; source>0; source>>=1)
if (source%2==1) n++;
return n;
}
public DictionarySource intersection(DictionarySource dsO)
{
BitDictionarySource ds = new BitDictionarySource(), dsOB = (BitDictionarySource) dsO;
ds.setDicts(this.dicts & dsOB.dicts);
return ds;
}
/** Returns an array containing the indexes for the available dictionaries. Use this
method when you know exactly how many dictionaries there are! */
public int[] untangleDefs(int n)
{
int arr[], i, pos, source;
arr = new int[n];
for (i=0, pos=0, source=dicts & allDicts; pos<n; i++, source>>=1)
if (source%2==1)
arr[pos++]=i;
return arr;
}
/** Returns an array containing the indexes for the available dictionaries.*/
public int[] untangleDefs()
{
return untangleDefs(countDefs());
}
public boolean isEmpty()
{
return (dicts & allDicts)<=0;
}
public void reset()
{
dicts = 0;
}
public String getTag(int i)
{
int source[] = this.untangleDefs();
if (defTags==null) return Integer.toString(source[i]+1);
return defTags[source[i]];
}
}

View file

@ -0,0 +1,307 @@
/*
The contents of this file are subject to the AMP Open Community License
Version 1.0 (the "License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License on the AMP web site
(http://www.tibet.iteso.mx/Guatemala/).
Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
License for the specific terms governing rights and limitations under the
License.
The Initial Developer of this software is Andres Montano Pellegrini. Portions
created by Andres Montano Pellegrini are Copyright 2001 Andres Montano
Pellegrini. All Rights Reserved.
Contributor(s): ______________________________________.
*/
package org.thdl.tib.scanner;
import java.io.*;
/** Specifies a subset of dictionaries among a set of
dictionaries. Supports a maximum of 30 dictionaries.
Unlike @BitDictionarySource, it provides the infrastructure
to group definitions from various dictionaries.
@author Andr&eacute;s Montano Pellegrini
*/
public class ByteDictionarySource extends DictionarySource
{
//private BitDictionarySource dicts[];
private BitDictionarySource dicts[];
private boolean hasBrother;
/** Last bit of word; 1 if there are more brothers.*/
private static final int lastBit = 64;
private static final int allDicts=lastBit-1;
public ByteDictionarySource()
{
dicts = null;
hasBrother = false;
}
public ByteDictionarySource(BitDictionarySource dicts[], boolean hasBrother)
{
this.dicts = dicts;
this.hasBrother = hasBrother;
}
public void insertDef(BitDictionarySource newDef, int n)
{
int i;
BitDictionarySource newDicts[] = new BitDictionarySource[dicts.length+1];
for (i=0; i<n; i++)
newDicts[i] = dicts[i];
newDicts[n] = newDef;
for (i=n+1; i<newDicts.length; i++)
newDicts[i] = dicts[i-1];
dicts = newDicts;
}
public void deleteDef(int n)
{
int i;
BitDictionarySource newDicts[] = new BitDictionarySource[dicts.length-1];
for (i=0; i<n; i++)
newDicts[i] = dicts[i];
for (i=n+1; i<dicts.length; i++)
newDicts[i-1] = dicts[i];
dicts = newDicts;
}
public void addNewDef(int dictNum)
{
if (dicts==null)
{
dicts = new BitDictionarySource[1];
}
else
{
BitDictionarySource newDicts[] = new BitDictionarySource[dicts.length+1];
int i;
for (i=0; i<dicts.length; i++)
newDicts[i] = dicts[i];
dicts = newDicts;
}
dicts[dicts.length-1] = new BitDictionarySource();
dicts[dicts.length-1].add(dictNum);
}
public void addDictToDef(int dict, int def)
{
dicts[def].add(dict);
}
public void addDictToDef(BitDictionarySource dicts, int def)
{
this.dicts[def].add(dicts);
}
public BitDictionarySource getDef(int i)
{
return dicts[i];
}
public void dubDef(int n)
{
BitDictionarySource newDicts[] = new BitDictionarySource[dicts.length+1];
int i;
for (i=0; i<=n; i++)
newDicts[i] = dicts[i];
newDicts[n+1] = new BitDictionarySource(newDicts[n].getDicts());
for (i=n+2; i<newDicts.length; i++)
newDicts[i] = dicts[i-1];
dicts = newDicts;
}
public boolean isDictInDef (int dict, int def)
{
return dicts[def].contains(dict);
}
public boolean isDictInDefAlone (int dict, int def)
{
return dicts[def].contains(dict) && dicts[def].countDefs()==1;
}
public int containsAlone(int dict)
{
int i;
if (dicts == null) return -1;
for (i=0; i<dicts.length; i++)
{
if (isDictInDefAlone(dict,i))
return i;
}
return -1;
}
/** Write to file using BinaryFileGenerator */
public void print(boolean hasNext, DataOutput raf) throws IOException
{
int i, j, eachDict[], n;
/* first write how many definitions are, using the first bit to mark
the brothers of the node. */
if (dicts==null)
{
if (hasNext) raf.writeByte(lastBit);
else raf.writeByte(0);
return;
}
if (hasNext) n = lastBit | dicts.length;
else n = dicts.length;
raf.writeByte(n);
/* Then write the dictionaries associated with each definition, using the
first bit to mark for more dicts. */
for (i=0; i<dicts.length; i++)
{
eachDict = dicts[i].untangleDefs();
n = eachDict.length-1;
for (j=0; j<n; j++)
raf.writeByte(lastBit | eachDict[j]);
raf.writeByte(eachDict[n]);
}
}
public void read(DataInput raf) throws IOException
{
int i, n;
n = (int) raf.readByte();
if ((n & lastBit)>0)
{
hasBrother = true;
n = n & allDicts;
}
else hasBrother = false;
if (n==0)
{
dicts = null;
return;
}
dicts = new BitDictionarySource[n];
for (i=0; i< dicts.length; i++)
{
dicts[i] = new BitDictionarySource();
do
{
n = (int) raf.readByte();
dicts[i].add(n & allDicts);
} while((n & lastBit)>0);
}
}
public boolean hasBrothers()
{
return this.hasBrother;
}
public boolean contains(int dict)
{
int i;
if (dicts==null) return false;
for (i=0; i<dicts.length; i++)
if (dicts[i].contains(i)) return true;
return false;
}
public int containsDict(BitDictionarySource dict)
{
int i;
if (dicts==null) return -1;
for (i=0; i<dicts.length; i++)
if (dicts[i].equals(dict)) return i;
return -1;
}
public int countDefs()
{
int i, n;
if (dicts==null) return 0;
n=0;
for (i=0; i<dicts.length; i++)
if (!dicts[i].isEmpty()) n++;
return n;
}
public void reset()
{
dicts = null;
}
public boolean isEmpty(int def)
{
return dicts[def].isEmpty();
}
public boolean isEmpty()
{
int i;
if (dicts == null) return true;
for (i=0; i< dicts.length; i++)
if (!dicts[i].isEmpty()) return false;
return true;
}
public DictionarySource intersection(DictionarySource dsO)
{
BitDictionarySource newDicts[], dsOB;
int i;
newDicts = new BitDictionarySource[dicts.length];
dsOB = (BitDictionarySource) dsO;
for (i=0; i<dicts.length; i++)
newDicts[i] = (BitDictionarySource) dicts[i].intersection(dsOB);
return new ByteDictionarySource(newDicts, hasBrother);
}
public String getTag(int n)
{
int i, source[] = dicts[n].untangleDefs();
String tag;
if (defTags==null) tag = Integer.toString(source[0]+1);
else tag = defTags[source[0]];
for (i=1; i<source.length; i++)
{
tag += ", ";
if (defTags==null) tag += Integer.toString(source[i]+1);
else tag += defTags[source[i]];
}
return tag;
}
}

View file

@ -42,18 +42,21 @@ public class CachedSyllableListTree implements SyllableListTree
long pos, defSources[]; long pos, defSources[];
DictionarySource sourceDef; DictionarySource sourceDef;
int i; int i;
FileSyllableListTree.openFiles(archivo);
long posLista = FileSyllableListTree.wordRaf.length() - 4;
SimplifiedLinkedList syllables = new SimplifiedLinkedList();
FileSyllableListTree.wordRaf.seek(posLista); FileSyllableListTree.openFiles(archivo);
posLista = (long) FileSyllableListTree.wordRaf.readInt();
FileSyllableListTree.wordRaf.seek(posLista); SimplifiedLinkedList syllables = new SimplifiedLinkedList();
do do
{ {
// get "link" to children
pos = (long) FileSyllableListTree.wordRaf.readInt(); pos = (long) FileSyllableListTree.wordRaf.readInt();
// get syllable
sil = FileSyllableListTree.wordRaf.readUTF(); sil = FileSyllableListTree.wordRaf.readUTF();
sourceDef = DictionarySource.read(FileSyllableListTree.wordRaf); // get dictionary information for each definition
if (FileSyllableListTree.versionNumber==2) sourceDef = new BitDictionarySource();
else sourceDef = new ByteDictionarySource();
sourceDef.read(FileSyllableListTree.wordRaf);
if (sourceDef.isEmpty()) defSources = null; if (sourceDef.isEmpty()) defSources = null;
else else
{ {
@ -88,9 +91,13 @@ public class CachedSyllableListTree implements SyllableListTree
public DictionarySource getDictionarySource() public DictionarySource getDictionarySource()
{ {
return FileSyllableListTree.defSourcesWanted; return null;
} }
public BitDictionarySource getDictionarySourcesWanted()
{
return FileSyllableListTree.defSourcesWanted;
}
public boolean hasDef() public boolean hasDef()
{ {

View file

@ -28,20 +28,9 @@ package org.thdl.tib.scanner;
public class Definitions public class Definitions
{ {
public String[] def; public String[] def;
public int[] source; private DictionarySource source;
public static String[] defTags;
static public Definitions(String[] def, DictionarySource source)
{
defTags=null;
}
public static void setTags(String tags[])
{
defTags = tags;
}
public Definitions(String[] def, int[] source)
{ {
this.def = def; this.def = def;
this.source = source; this.source = source;
@ -54,24 +43,53 @@ public class Definitions
this.def[0] = def; this.def[0] = def;
} }
public String getTag(int i) public String getPreview()
{ {
if (source==null) return null; String s;
if (defTags==null) return Integer.toString(source[i]+1); int i;
return defTags[source[i]];
if (def==null) return "";
s = def[0];
for (i=1; i<def.length; i++)
s = s + ". " + def[i];
return s;
} }
public String toString() public String toString()
{ {
int i; int i,j;
String s; String s;
if (def==null) return null; if (def==null) return null;
if (source==null) return def[0]; if (source==null) return def[0];
s = "(" + getTag(0) + ") " + def[0]; if (FileSyllableListTree.versionNumber==2)
{
s = "(" + source.getTag(0) + ") " + def[0];
for (i=1; i<def.length; i++) for (i=1; i<def.length; i++)
s += "\n" + "(" + getTag(i) + ") " + def[i]; s += "\n" + "(" + source.getTag(i) + ") " + def[i];
}
else
{
ByteDictionarySource sourceb = (ByteDictionarySource) source;
j=0;
while (sourceb.isEmpty(j)) j++;
s = "(" + sourceb.getTag(j) + ") " + def[0];
for (i=1; i<def.length; i++)
{
j++;
while (sourceb.isEmpty(j)) j++;
s += "\n" + "(" + sourceb.getTag(j) + ") " + def[i];
}
}
return s; return s;
} }
public DictionarySource getDictionarySource()
{
return source;
}
} }

View file

@ -15,123 +15,54 @@ Pellegrini. All Rights Reserved.
Contributor(s): ______________________________________. Contributor(s): ______________________________________.
*/ */
package org.thdl.tib.scanner;
package org.thdl.tib.scanner;
import java.io.*; import java.io.*;
/** Specifies a subset of dictionaries among a set of /** Specifies a generic interface to access and process a subset of
dictionaries. Supports a maximum of 30 dictionaries. dictionaries among a set of dictionaries.
@author Andr&eacute;s Montano Pellegrini @author Andr&eacute;s Montano Pellegrini
*/ */
public class DictionarySource public abstract class DictionarySource
{ {
private int dicts; public static String[] defTags;
static
/** Last bit of word; 1 if there are more brothers.*/
private static final int lastBit=1073741824;
private static final int allDicts=lastBit-1;
public DictionarySource()
{ {
dicts = 0; defTags=null;
} }
public static DictionarySource getAllDictionaries() public static void setTags(String tags[])
{ {
DictionarySource ds = new DictionarySource(); defTags = tags;
ds.setDicts(allDicts);
return ds;
} }
public void setAllDictionaries() /** Get the tag or tags associated to definition number i. */
{ public abstract String getTag(int i);
dicts = allDicts;
}
public void setDicts(int dicts) /** Marks all dictionaries as unselected */
{ public abstract void reset();
this.dicts = dicts;
}
public int getDicts() /** Returns an instance of DictionarySource marking as selected all dictionaries
{ that were selected in both the current and dsO. */
return dicts; public abstract DictionarySource intersection(DictionarySource dsO);
}
private int getBits(int n) /** Returns true if no dictionaries are selected. */
{ public abstract boolean isEmpty();
return 1 << n;
}
public boolean contains(int dict) /** Writes the dictionary information to a random access file. */
{ public abstract void print(boolean hasNext, DataOutput raf) throws IOException;
return (dicts & getBits(dict))>0;
}
public void add(int dict) /** Reads the dictionary information from a random access file, according
{ to the way it was written with @print. */
dicts|= getBits(dict); public abstract void read(DataInput raf) throws IOException;
}
/** Write to file using BinaryFileGenerator */ /** Returns the number of definitions available. */
public void print(boolean hasNext, DataOutput raf) throws IOException public abstract int countDefs();
{
int numDict;
if (hasNext) numDict = lastBit | dicts;
else numDict = dicts;
raf.writeInt(numDict);
}
public static DictionarySource read(DataInput raf) throws IOException /** Returns true if the node has brothers. This is used by @FileSyllableListTree. */
{ public abstract boolean hasBrothers();
DictionarySource ds = new DictionarySource();
ds.setDicts(raf.readInt());
return ds;
}
public boolean hasBrothers() /** Returns true if dict is a selected dictionary. */
{ public abstract boolean contains(int dict);
return (dicts & lastBit)>0;
}
public int countDefs()
{
int n, source;
for (n=0, source = dicts & allDicts; source>0; source>>=1)
if (source%2==1) n++;
return n;
}
public DictionarySource intersection(DictionarySource dsO)
{
DictionarySource ds = new DictionarySource();
ds.setDicts(this.dicts & dsO.dicts);
return ds;
}
public int[] untangleDefs(int n)
{
int arr[], i, pos, source;
arr = new int[n];
for (i=0, pos=0, source=dicts & allDicts; pos<n; i++, source>>=1)
if (source%2==1)
arr[pos++]=i;
return arr;
}
public int[] untangleDefs()
{
return untangleDefs(countDefs());
}
public boolean isEmpty()
{
return (dicts & allDicts)<=0;
}
public void reset()
{
dicts = 0;
}
} }

View file

@ -64,7 +64,7 @@ public class DictionaryTableModel extends AbstractTableModel
case 0: case 0:
if (tibetanActivated) return arrayTibetan[row]; if (tibetanActivated) return arrayTibetan[row];
else return array[row].getWylie(); else return array[row].getWylie();
case 1: return array[row].getDef(); case 1: return array[row].getDefPreview();
default: return array[row].toString(); default: return array[row].toString();
} }
} }

View file

@ -38,20 +38,24 @@ public class FileSyllableListTree implements SyllableListTree
private long def[]; private long def[];
private long posLista; private long posLista;
private DictionarySource defSource; private DictionarySource defSource;
public static DictionarySource defSourcesWanted; public static BitDictionarySource defSourcesWanted;
public static RandomAccessFile wordRaf=null; public static RandomAccessFile wordRaf=null;
private static RandomAccessFile defRaf=null; private static RandomAccessFile defRaf=null;
public static int versionNumber;
/** Creates the root */ /** Creates the root */
public FileSyllableListTree(String archivo, int defSourcesWanted) throws Exception public FileSyllableListTree(String archivo, int defSourcesWanted) throws Exception
{ {
sil = null; sil = null;
def = null; def = null;
this.defSource = new DictionarySource(); defSource = null;
openFiles(archivo);
posLista = wordRaf.length() - 4; this.openFiles(archivo);
wordRaf.seek(posLista); posLista = this.wordRaf.getFilePointer();
posLista = (long)wordRaf.readInt();
/* if versionNumber is 2 use BitDictionarySource
else use ByteDictionarySource. */
this.defSourcesWanted.setDicts(defSourcesWanted);
} }
/** Used to create each node (except the root) /** Used to create each node (except the root)
@ -74,11 +78,60 @@ public class FileSyllableListTree implements SyllableListTree
return defSource; return defSource;
} }
public BitDictionarySource getDictionarySourcesWanted()
{
return this.defSourcesWanted;
}
public static void openFiles(String archivo) throws Exception public static void openFiles(String archivo) throws Exception
{ {
long fileSize;
int pos;
wordRaf = new RandomAccessFile(archivo + ".wrd", "r"); wordRaf = new RandomAccessFile(archivo + ".wrd", "r");
defRaf = new RandomAccessFile(archivo + ".def", "r"); defRaf = new RandomAccessFile(archivo + ".def", "r");
defSourcesWanted = DictionarySource.getAllDictionaries();
fileSize = wordRaf.length();
wordRaf.seek(fileSize-4L);
pos = wordRaf.readInt();
if (pos >> 8 == -1)
{
versionNumber = pos & 255;
// for now, only version 2 & 3 should be expected
if (versionNumber != 3) versionNumber=2;
wordRaf.seek(fileSize-8L);
pos = wordRaf.readInt();
}
else
{
// Updates the dictionary for backward compatibility.
try
{
wordRaf.close();
wordRaf = new RandomAccessFile(archivo + ".wrd", "rw");
wordRaf.seek(fileSize);
wordRaf.writeShort(-1);
wordRaf.writeByte(-1);
// Because it didn't have a version number, must be version 2.
versionNumber = 2;
wordRaf.writeByte(versionNumber);
wordRaf.close();
wordRaf = new RandomAccessFile(archivo + ".wrd", "r");
}
catch (Exception e)
{
// dictionary is stored on a non-writable media. Do nothing.
}
}
/* if versionNumber is 2 use BitDictionarySource else use
ByteDictionarySource. */
defSourcesWanted = new BitDictionarySource();
wordRaf.seek(pos);
} }
public String getDef() public String getDef()
@ -90,12 +143,14 @@ public class FileSyllableListTree implements SyllableListTree
{ {
if (def==null) return null; if (def==null) return null;
DictionarySource defSourceAvail = defSource.intersection(defSourcesWanted); DictionarySource defSourceAvail = defSource.intersection(defSourcesWanted);
String defs[];
int defsAvail[] = defSourceAvail.untangleDefs(), defsFound[] = defSource.untangleDefs(def.length);
String defs[] = new String[defsAvail.length];
int i, n=0; int i, n=0;
if (versionNumber==2)
{
int defsAvail[] = ((BitDictionarySource) defSourceAvail).untangleDefs(), defsFound[] = ((BitDictionarySource) defSource).untangleDefs(def.length);
defs = new String[defsAvail.length];
try try
{ {
for (i=0; i<defsAvail.length; i++) for (i=0; i<defsAvail.length; i++)
@ -110,7 +165,31 @@ public class FileSyllableListTree implements SyllableListTree
System.out.println(e); System.out.println(e);
return null; return null;
} }
return new Definitions(defs, defsAvail); }
else
{
ByteDictionarySource defSourceAvailBy = (ByteDictionarySource) defSourceAvail;
defs = new String [defSourceAvailBy.countDefs()];
try
{
for (i=0; i < def.length; i++)
{
if (!defSourceAvailBy.isEmpty(i))
{
defRaf.seek(def[i]);
defs[n] = defRaf.readUTF();
n++;
}
}
}
catch (Exception e)
{
System.out.println(e);
return null;
}
}
return new Definitions(defs, defSourceAvail);
} }
public boolean hasDef() public boolean hasDef()
@ -125,6 +204,7 @@ public class FileSyllableListTree implements SyllableListTree
String sil; String sil;
long pos, defSource[]; long pos, defSource[];
DictionarySource sourceDef; DictionarySource sourceDef;
int i; int i;
if (silStr==null) return null; if (silStr==null) return null;
@ -135,7 +215,11 @@ public class FileSyllableListTree implements SyllableListTree
{ {
pos = (long) wordRaf.readInt(); pos = (long) wordRaf.readInt();
sil = wordRaf.readUTF(); sil = wordRaf.readUTF();
sourceDef = DictionarySource.read(wordRaf);
if (versionNumber==2) sourceDef = new BitDictionarySource();
else sourceDef = new ByteDictionarySource();
sourceDef.read(wordRaf);
if (sourceDef.isEmpty()) defSource = null; if (sourceDef.isEmpty()) defSource = null;
else else
{ {

View file

@ -43,9 +43,9 @@ public class LocalTibetanScanner extends TibetanScanner
archivo = null; archivo = null;
} }
public DictionarySource getDictionarySource() public BitDictionarySource getDictionarySource()
{ {
return raiz.getDictionarySource(); return raiz.getDictionarySourcesWanted();
} }
@ -401,7 +401,7 @@ outAHere:
ll2.addLast(s.substring(n+1).trim()); ll2.addLast(s.substring(n+1).trim());
} }
} }
Definitions.defTags = ll2.toStringArray(); DictionarySource.setTags(ll2.toStringArray());
return ll1.toStringArray(); return ll1.toStringArray();
} }
catch (Exception e) catch (Exception e)

View file

@ -55,6 +55,78 @@ public class Manipulate
return linea; return linea;
} }
public static String deleteSubstring (String string, int pos, int posEnd)
{
if (pos<0) return string;
if (pos==0)
{
return string.substring(posEnd).trim();
}
else
{
if (posEnd<string.length())
return string.substring(0, pos).concat(string.substring(posEnd)).trim();
else
return string.substring(0, pos).trim();
}
}
public static String replace(String string, int pos, int posEnd, String newSub)
{
if (pos<0) return string;
if (pos==0)
{
return newSub.concat(string.substring(posEnd)).trim();
}
else
{
if (posEnd<string.length())
return string.substring(0, pos).concat(newSub).concat(string.substring(posEnd)).trim();
else
return string.substring(0, pos).concat(newSub).trim();
}
}
public static String deleteSubstring (String string, String sub)
{
int pos = string.indexOf(sub), posEnd = pos + sub.length();
return deleteSubstring(string, pos, posEnd);
}
public static String[] addString(String array[], String s, int n)
{
int i;
String newArray[] = new String[array.length+1];
for (i=0; i<n; i++)
newArray[i] = array[i];
newArray[n] = s;
for (i=n+1; i<newArray.length; i++)
newArray[i] = array[i-1];
return newArray;
}
public static String[] deleteString(String array[], int n)
{
int i;
String newArray[] = new String[array.length-1];
for (i=0; i<n; i++)
newArray[i] = array[i];
for (i=n; i<newArray.length; i++)
newArray[i] = array[i+1];
return newArray;
}
public static boolean isVowel (char ch) public static boolean isVowel (char ch)
{ {
ch = Character.toLowerCase(ch); ch = Character.toLowerCase(ch);

View file

@ -52,6 +52,14 @@ public class MemorySyllableListTree extends SimplifiedLinkedList implements Syll
return null; return null;
} }
/** Null because it does not support multiple dictionaries.
*/
public BitDictionarySource getDictionarySourcesWanted()
{
return null;
}
public MemorySyllableListTree(String sil, String def) public MemorySyllableListTree(String sil, String def)
{ {

View file

@ -57,8 +57,7 @@ public class OnLineScannerFilter extends HttpServlet
} }
synchronized public void doGet(HttpServletRequest request, synchronized public void doGet(HttpServletRequest request,
HttpServletResponse response) HttpServletResponse response) throws IOException, ServletException
throws IOException, ServletException
{ {
String answer, parrafo = null, checkboxName; String answer, parrafo = null, checkboxName;
@ -69,7 +68,7 @@ public class OnLineScannerFilter extends HttpServlet
response.setContentType("text/html"); response.setContentType("text/html");
PrintWriter out = response.getWriter(); PrintWriter out = response.getWriter();
DictionarySource ds=null; BitDictionarySource ds=null;
boolean checkedDicts[], allUnchecked, wantsTibetan, useTHDLBanner = (request.getParameter("thdlBanner")!=null); boolean checkedDicts[], allUnchecked, wantsTibetan, useTHDLBanner = (request.getParameter("thdlBanner")!=null);
// int percent=100; // int percent=100;
@ -225,14 +224,15 @@ public class OnLineScannerFilter extends HttpServlet
ds.add(i); ds.add(i);
} }
if (dictionaries[i]!=null) if (dictionaries[i]!=null)
out.print(">" + dictionaries[i] + " (" + Definitions.defTags[i] + ")&nbsp;&nbsp;&nbsp;"); out.print(">" + dictionaries[i] + " (" + DictionarySource.defTags[i] + ")&nbsp;&nbsp;&nbsp;");
else else
out.print(">" + Definitions.defTags[i] + "&nbsp;&nbsp;&nbsp;"); out.print(">" + DictionarySource.defTags[i] + "&nbsp;&nbsp;&nbsp;");
// out.println(" + "</td>"); // out.println(" + "</td>");
} }
// out.println(" </tr>"); // out.println(" </tr>");
} }
else ds = DictionarySource.getAllDictionaries(); // fix for updates
else ds = new BitDictionarySource().getAllDictionaries();
// out.println("</table>"); // out.println("</table>");
out.println("</p>"); out.println("</p>");
out.println("<table border=\"0\" width=\"100%\">"); out.println("<table border=\"0\" width=\"100%\">");
@ -300,36 +300,40 @@ public class OnLineScannerFilter extends HttpServlet
init = fin+1; init = fin+1;
} */ } */
scanner.clearTokens();
scanner.scanBody(in); scanner.scanBody(in);
scanner.finishUp(); scanner.finishUp();
words = scanner.getTokenArray(); printText(pw, tibetan);
printText(pw, words, tibetan); printAllDefs(pw, tibetan);
printAllDefs(pw, words, tibetan);
scanner.clearTokens(); scanner.clearTokens();
} }
} }
public void printText(PrintWriter pw, Object words[], boolean tibetan) public void printText(PrintWriter pw, boolean tibetan)
{ {
Token token; Token words[] = scanner.getTokenArray();
Word word; Word word;
char pm; char pm;
int i; int i;
if (words==null) return;
pw.print("<p>"); pw.print("<p>");
for (i=0; i < words.length; i++) for (i=0; i < words.length; i++)
{ {
token = (Token) words[i];
if (token instanceof Word) if (words[i] instanceof Word)
{ {
word = (Word) token; word = (Word) words[i];
if (word.getDefs().getDictionarySource()!=null)
pw.print(word.getLink()); pw.print(word.getLink());
else pw.print(word.getWylie() + " ");
} }
else else
{ {
if (token instanceof PunctuationMark) if (words[i] instanceof PunctuationMark)
{ {
pm = token.toString().charAt(0); pm = words[i].toString().charAt(0);
switch (pm) switch (pm)
{ {
case '\n': case '\n':
@ -352,53 +356,49 @@ public class OnLineScannerFilter extends HttpServlet
pw.println("</p>"); pw.println("</p>");
} }
public void printAllDefs(PrintWriter pw, Object words[], boolean tibetan) public void printAllDefs(PrintWriter pw, boolean tibetan)
{ {
SimplifiedLinkedList temp = new SimplifiedLinkedList(); int i, j;
int i; Word words[];
Word word;
Definitions defs; Definitions defs;
for (i=words.length-1; i >= 0; i--)
{
if (words[i] instanceof Word)
{
if (!temp.contains(words[i]))
{
temp.addLast(words[i]);
}
}
}
SimplifiedListIterator li = temp.listIterator();
String tag; String tag;
DictionarySource ds;
words = scanner.getWordArray(false);
if (words == null) return;
pw.println("<table border=\"1\" width=\"100%\">"); pw.println("<table border=\"1\" width=\"100%\">");
while (li.hasNext())
for (j=0; j<words.length; j++)
{ {
word = (Word)li.next(); defs = words[j].getDefs();
defs = word.getDefs(); ds = defs.getDictionarySource();
if (ds==null) continue;
pw.println(" <tr>"); pw.println(" <tr>");
tag = defs.getTag(0); tag = ds.getTag(0);
if (tag!=null) // else tag = null;
{ /*if (tag!=null)
pw.println(" <td width=\"20%\" rowspan=\""+ defs.def.length +"\" valign=\"top\">"+ word.getBookmark(tibetan) +"</td>"); {*/
pw.println(" <td width=\"5%\">"+ tag +"</td>"); pw.println(" <td width=\"20%\" rowspan=\""+ defs.def.length +"\" valign=\"top\">"+ words[j].getBookmark(tibetan) +"</td>");
pw.println(" <td width=\"75%\">" + defs.def[0] + "</td>"); pw.println(" <td width=\"12%\">"+ tag +"</td>");
} pw.println(" <td width=\"68%\">" + defs.def[0] + "</td>");
/*}
else else
{ {
pw.println(" <td width=\"20%\" rowspan=\""+ defs.def.length +"\" valign=\"top\">"+ word.getBookmark(tibetan) +"</td>"); pw.println(" <td width=\"20%\" rowspan=\""+ defs.def.length +"\" valign=\"top\">"+ words[j].getBookmark(tibetan) +"</td>");
pw.println(" <td width=\"80%\" colspan=\"2\">" + defs.def[0] + "</td>"); pw.println(" <td width=\"80%\" colspan=\"2\">" + defs.def[0] + "</td>");
} }*/
pw.println(" </tr>"); pw.println(" </tr>");
for (i=1; i<defs.def.length; i++) for (i=1; i<defs.def.length; i++)
{ {
pw.println(" <tr>"); pw.println(" <tr>");
tag = defs.getTag(i); if (ds!=null) tag = ds.getTag(i);
else tag = null;
if (tag!=null) if (tag!=null)
{ {
pw.println(" <td width=\"5%\">"+ tag +"</td>"); pw.println(" <td width=\"12%\">"+ tag +"</td>");
pw.println(" <td width=\"75%\">" + defs.def[i] + "</td>"); pw.println(" <td width=\"68%\">" + defs.def[i] + "</td>");
} }
else pw.println(" <td width=\"80%\" colspan=\"2\">" + defs.def[i] + "</td>"); else pw.println(" <td width=\"80%\" colspan=\"2\">" + defs.def[i] + "</td>");
pw.println(" </tr>"); pw.println(" </tr>");

View file

@ -35,7 +35,7 @@ import javax.servlet.http.*;
public class RemoteScannerFilter extends GenericServlet public class RemoteScannerFilter extends GenericServlet
{ {
private TibetanScanner scanner; private TibetanScanner scanner;
private DictionarySource ds; private BitDictionarySource ds;
public RemoteScannerFilter() throws Exception public RemoteScannerFilter() throws Exception
{ {
@ -53,6 +53,7 @@ public class RemoteScannerFilter extends GenericServlet
PrintWriter out = res.getWriter(); PrintWriter out = res.getWriter();
int i; int i;
String linea, dicts = req.getParameter("dicts"), dicDescrip[]; String linea, dicts = req.getParameter("dicts"), dicDescrip[];
if (dicts!=null) if (dicts!=null)
{ {
if (dicts.equals("names")) if (dicts.equals("names"))
@ -66,7 +67,7 @@ public class RemoteScannerFilter extends GenericServlet
for (i=0; i<dicDescrip.length; i++) for (i=0; i<dicDescrip.length; i++)
{ {
out.println(dicDescrip[i] + "," + Definitions.defTags[i]); out.println(dicDescrip[i] + "," + DictionarySource.defTags[i]);
} }
out.close(); out.close();
return; return;

View file

@ -31,12 +31,12 @@ import java.io.*;
public class RemoteTibetanScanner extends TibetanScanner public class RemoteTibetanScanner extends TibetanScanner
{ {
private String url; private String url;
private DictionarySource defSourcesWanted; private BitDictionarySource defSourcesWanted;
public RemoteTibetanScanner(String url) throws Exception public RemoteTibetanScanner(String url) throws Exception
{ {
super(); super();
defSourcesWanted = DictionarySource.getAllDictionaries(); defSourcesWanted = new BitDictionarySource().getAllDictionaries();
this.url = url; this.url = url;
} }
@ -95,7 +95,7 @@ public class RemoteTibetanScanner extends TibetanScanner
{ {
} }
public DictionarySource getDictionarySource() public BitDictionarySource getDictionarySource()
{ {
return defSourcesWanted; return defSourcesWanted;
} }
@ -129,7 +129,7 @@ public class RemoteTibetanScanner extends TibetanScanner
} }
} }
br.close(); br.close();
Definitions.defTags = ll2.toStringArray(); DictionarySource.setTags(ll2.toStringArray());
return ll1.toStringArray(); return ll1.toStringArray();
} }
catch (Exception e) catch (Exception e)

View file

@ -128,9 +128,9 @@ public abstract class ScannerPanel extends Panel implements ActionListener
for (i=0; i<dictionaries.length; i++) for (i=0; i<dictionaries.length; i++)
{ {
if (dictionaries[i]!=null) if (dictionaries[i]!=null)
chkDicts[i] = new Checkbox(dictionaries[i] + " (" + Definitions.defTags[i] + ")", true); chkDicts[i] = new Checkbox(dictionaries[i] + " (" + DictionarySource.defTags[i] + ")", true);
else else
chkDicts[i] = new Checkbox(Definitions.defTags[i], true); chkDicts[i] = new Checkbox(DictionarySource.defTags[i], true);
panel2.add(chkDicts[i]); panel2.add(chkDicts[i]);
} }
return panel2; return panel2;
@ -153,7 +153,7 @@ public abstract class ScannerPanel extends Panel implements ActionListener
status.setText("Finishing..."); status.setText("Finishing...");
} }
protected void setDicts(DictionarySource ds) protected void setDicts(BitDictionarySource ds)
{ {
if (chkDicts==null) if (chkDicts==null)
{ {

View file

@ -37,13 +37,14 @@ public class SimpleScannerPanel extends ScannerPanel implements ItemListener
private List listDefs; private List listDefs;
private Word wordArray[]; private Word wordArray[];
private int lenPreview; private int lenPreview;
private static int WIDTH_PORTRAIT = 34; private static int WIDTH_PORTRAIT = 36;
private static int WIDTH_LANDSCAPE = 46; private static int WIDTH_LANDSCAPE = 48;
public SimpleScannerPanel(String file, boolean landscape) public SimpleScannerPanel(String file, boolean landscape)
{ {
super(file); super(file);
Panel panel1, panel2; Panel panel1, panel2;
Font f;
cardPanel = new Panel(new CardLayout()); cardPanel = new Panel(new CardLayout());
// FIXME values shouldn't be hardwired // FIXME values shouldn't be hardwired
@ -53,22 +54,44 @@ public class SimpleScannerPanel extends ScannerPanel implements ItemListener
// panel1 = new Panel(new GridLayout(3, 1)); // panel1 = new Panel(new GridLayout(3, 1));
panel1 = new Panel(new BorderLayout());
// txtInput = new TextArea("",1,1,TextArea.SCROLLBARS_VERTICAL_ONLY); // txtInput = new TextArea("",1,1,TextArea.SCROLLBARS_VERTICAL_ONLY);
if (landscape) txtInput = new TextArea("", 3, WIDTH_LANDSCAPE, TextArea.SCROLLBARS_VERTICAL_ONLY);
else txtInput = new TextArea("", 4, WIDTH_PORTRAIT, TextArea.SCROLLBARS_VERTICAL_ONLY); panel1 = new Panel(new BorderLayout());
//panel1.add(txtInput); panel2 = new Panel(new GridLayout(2, 1));
panel1.add(txtInput, BorderLayout.NORTH);
listDefs = new List(); listDefs = new List();
if (landscape)
{
txtInput = new TextArea("", 0, 0, TextArea.SCROLLBARS_VERTICAL_ONLY);
txtOutput = new TextArea("", 4, WIDTH_LANDSCAPE,TextArea.SCROLLBARS_VERTICAL_ONLY);
panel2.add(txtInput);
panel2.add(listDefs);
panel1.add(panel2, BorderLayout.CENTER);
panel1.add(txtOutput, BorderLayout.SOUTH);
}
else
{
txtInput = new TextArea("", 4, WIDTH_PORTRAIT, TextArea.SCROLLBARS_VERTICAL_ONLY);
txtOutput = new TextArea("",0, 0,TextArea.SCROLLBARS_VERTICAL_ONLY);
panel2.add(listDefs);
panel2.add(txtOutput);
panel1.add(txtInput, BorderLayout.NORTH);
panel1.add(panel2, BorderLayout.CENTER);
}
listDefs.setMultipleMode(false); listDefs.setMultipleMode(false);
listDefs.addItemListener(this); listDefs.addItemListener(this);
panel2 = new Panel(new GridLayout(2,1));
panel2.add(listDefs);
txtOutput = new TextArea("",0,0,TextArea.SCROLLBARS_VERTICAL_ONLY);
txtOutput.setEditable(false); txtOutput.setEditable(false);
panel2.add(txtOutput);
panel1.add(panel2, BorderLayout.CENTER); /*f = new Font(null, Font.PLAIN, 10);
txtOutput.setFont(f);
txtInput.setFont(f);*/
cardPanel.add(panel1, "1"); cardPanel.add(panel1, "1");
// FIXME: values shouldn't be hardwired // FIXME: values shouldn't be hardwired
@ -113,8 +136,7 @@ public class SimpleScannerPanel extends ScannerPanel implements ItemListener
for(i=0; i<wordArray.length; i++) for(i=0; i<wordArray.length; i++)
{ {
preview = wordArray[i].getWordDefPreview();
preview = wordArray[i].toString();
if (preview.length()>lenPreview) preview = preview.substring(0,lenPreview); if (preview.length()>lenPreview) preview = preview.substring(0,lenPreview);
listDefs.add(preview); listDefs.add(preview);
} }

View file

@ -39,5 +39,7 @@ public interface SyllableListTree
public Definitions getDefs(); public Definitions getDefs();
public boolean hasDef(); public boolean hasDef();
public SyllableListTree lookUp(String silStr); public SyllableListTree lookUp(String silStr);
public DictionarySource getDictionarySource(); public DictionarySource getDictionarySource();
public BitDictionarySource getDictionarySourcesWanted();
} }

View file

@ -25,7 +25,7 @@ import org.thdl.util.*;
*/ */
public abstract class TibetanScanner public abstract class TibetanScanner
{ {
public static final String version = "The Tibetan to English Translation Tool, version 2.2.2 compiled on " + ThdlVersion.getTimeOfCompilation() + ". "; public static final String version = "The Tibetan to English Translation Tool, version 3.0.0 compiled on " + ThdlVersion.getTimeOfCompilation() + ". ";
public static final String copyrightUnicode="Copyright " + '\u00A9' + " 2000-2004 by Andr" + '\u00E9' + "s Montano Pellegrini, all rights reserved."; public static final String copyrightUnicode="Copyright " + '\u00A9' + " 2000-2004 by Andr" + '\u00E9' + "s Montano Pellegrini, all rights reserved.";
public static final String copyrightASCII="Copyright 2000-2004 by Andres Montano Pellegrini, all rights reserved."; public static final String copyrightASCII="Copyright 2000-2004 by Andres Montano Pellegrini, all rights reserved.";
public static final String copyrightHTML="<hr><small><strong>" + version + "Copyright &copy; 2000-2004 by <a href=\"http://www.people.virginia.edu/~am2zb/\" target=\"_blank\">Andr&eacute;s Montano Pellegrini.</a><br/>All rights reserved.</strong></small>"; public static final String copyrightHTML="<hr><small><strong>" + version + "Copyright &copy; 2000-2004 by <a href=\"http://www.people.virginia.edu/~am2zb/\" target=\"_blank\">Andr&eacute;s Montano Pellegrini.</a><br/>All rights reserved.</strong></small>";
@ -293,6 +293,7 @@ public abstract class TibetanScanner
public Token[] getTokenArray() public Token[] getTokenArray()
{ {
int n=wordList.size(); int n=wordList.size();
if (n==0) return null;
Token token[] = new Token[n]; Token token[] = new Token[n];
SimplifiedListIterator li = wordList.listIterator(); SimplifiedListIterator li = wordList.listIterator();
while(li.hasNext()) while(li.hasNext())
@ -306,35 +307,76 @@ public abstract class TibetanScanner
} }
public Word[] getWordArray() public Word[] getWordArray()
{
return getWordArray(true);
}
public Word[] getWordArray(boolean includeRepeated)
{ {
Token token; Token token;
Word array[]; Word array[], word;
int n=0; int n=0;
SimplifiedListIterator li = wordList.listIterator(); SimplifiedListIterator li = wordList.listIterator();
SimplifiedLinkedList ll2, ll = new SimplifiedLinkedList();
while(li.hasNext()) while(li.hasNext())
if (li.next() instanceof Word) n++; {
token = (Token) li.next();
if (token instanceof Word)
{
ll.addLast(token);
}
}
if (includeRepeated)
{
n = ll.size();
if (n==0) return null; if (n==0) return null;
array = new Word[n]; array = new Word[n];
n--; li = ll.listIterator();
li = wordList.listIterator();
n=0;
while (li.hasNext())
{
array[n++] = (Word) li.next();
}
}
else
{
ll2 = new SimplifiedLinkedList();
li = ll.listIterator();
while(li.hasNext()) while(li.hasNext())
{ {
token = (Token) li.next(); word = (Word) li.next();
if (token instanceof Word)
if (!ll2.contains(word)) ll2.addLast(word);
}
n = ll2.size();
if (n==0) return null;
array = new Word[n];
li = ll2.listIterator();
while (li.hasNext())
{ {
array[n] = (Word) token; array[--n] = (Word) li.next();
n--;
} }
} }
return array; return array;
} }
public abstract void scanLine(String linea); public abstract void scanLine(String linea);
public abstract void scanBody(String linea); public abstract void scanBody(String linea);
public abstract void finishUp(); public abstract void finishUp();
public abstract DictionarySource getDictionarySource(); public abstract BitDictionarySource getDictionarySource();
public abstract String[] getDictionaryDescriptions(); public abstract String[] getDictionaryDescriptions();
} }

View file

@ -189,10 +189,12 @@ public class WindowScannerFilter implements WindowListener, FocusListener, Actio
mnuDicts = new CheckboxMenuItem("Dictionaries", false); mnuDicts = new CheckboxMenuItem("Dictionaries", false);
m.add(mnuDicts); m.add(mnuDicts);
mnuDicts.addItemListener(this); mnuDicts.addItemListener(this);
mb.add(m);
} }
else
{
m = new Menu("Help"); m = new Menu("Help");
if (!pocketpc)
{
for (int i = 0; i < DuffScannerPanel.keybdMgr.size(); i++) for (int i = 0; i < DuffScannerPanel.keybdMgr.size(); i++)
{ {
final JskadKeyboard kbd = DuffScannerPanel.keybdMgr.elementAt(i); final JskadKeyboard kbd = DuffScannerPanel.keybdMgr.elementAt(i);
@ -217,10 +219,10 @@ public class WindowScannerFilter implements WindowListener, FocusListener, Actio
} }
} }
m.add("-"); m.add("-");
}
mnuAbout = new MenuItem("About..."); mnuAbout = new MenuItem("About...");
m.add(mnuAbout); m.add(mnuAbout);
mnuAbout.addActionListener(this); mnuAbout.addActionListener(this);
}
mb.add(m); mb.add(m);
// disable menus // disable menus

View file

@ -78,6 +78,16 @@ public class Word extends Token
return def.toString(); return def.toString();
} }
public String getDefPreview()
{
return def.getPreview();
}
public String getWordDefPreview()
{
return super.token + " - " + getDefPreview();
}
public Definitions getDefs() public Definitions getDefs()
{ {
return def; return def;