From afd3a95a216774cdca31c5ac63e2a33c55fdc526 Mon Sep 17 00:00:00 2001
From: amontano
Date: Fri, 13 Aug 2004 04:47:35 +0000
Subject: [PATCH] Updated the dictionary structure to allow grouping of
dictionaries, this is the first step to try to clean up a bit the massive
repetitions in dictionaries.
---
source/org/thdl/tib/scanner/AcipToTab.java | 343 ++++++++++++++++++
.../thdl/tib/scanner/AppletScannerFilter.java | 16 +-
.../thdl/tib/scanner/BinaryFileGenerator.java | 262 +++++++++++--
.../thdl/tib/scanner/BitDictionarySource.java | 171 +++++++++
.../tib/scanner/ByteDictionarySource.java | 307 ++++++++++++++++
.../tib/scanner/CachedSyllableListTree.java | 23 +-
source/org/thdl/tib/scanner/Definitions.java | 68 ++--
.../thdl/tib/scanner/DictionarySource.java | 147 ++------
.../tib/scanner/DictionaryTableModel.java | 2 +-
.../tib/scanner/FileSyllableListTree.java | 134 +++++--
.../thdl/tib/scanner/LocalTibetanScanner.java | 6 +-
source/org/thdl/tib/scanner/Manipulate.java | 72 ++++
.../tib/scanner/MemorySyllableListTree.java | 8 +
.../thdl/tib/scanner/OnLineScannerFilter.java | 98 ++---
.../thdl/tib/scanner/RemoteScannerFilter.java | 5 +-
.../tib/scanner/RemoteTibetanScanner.java | 8 +-
source/org/thdl/tib/scanner/ScannerPanel.java | 6 +-
.../thdl/tib/scanner/SimpleScannerPanel.java | 56 ++-
.../thdl/tib/scanner/SyllableListTree.java | 2 +
.../org/thdl/tib/scanner/TibetanScanner.java | 72 +++-
.../thdl/tib/scanner/WindowScannerFilter.java | 14 +-
source/org/thdl/tib/scanner/Word.java | 10 +
22 files changed, 1533 insertions(+), 297 deletions(-)
create mode 100644 source/org/thdl/tib/scanner/AcipToTab.java
create mode 100644 source/org/thdl/tib/scanner/BitDictionarySource.java
create mode 100644 source/org/thdl/tib/scanner/ByteDictionarySource.java
diff --git a/source/org/thdl/tib/scanner/AcipToTab.java b/source/org/thdl/tib/scanner/AcipToTab.java
new file mode 100644
index 0000000..9f9130f
--- /dev/null
+++ b/source/org/thdl/tib/scanner/AcipToTab.java
@@ -0,0 +1,343 @@
+package org.thdl.tib.scanner;
+
+import java.net.*;
+import java.io.*;
+
+class AcipToTab
+{
+ private BufferedReader in;
+ private PrintWriter out;
+ private String currentDefiniendum, currentDefinition;
+
+ public AcipToTab(BufferedReader in, PrintWriter out)
+ {
+ this.in = in;
+ this.out = out;
+ }
+
+ public void add()
+ {
+ out.println(currentDefiniendum + '\t' + currentDefinition);
+ }
+
+ public static void main (String[] args) throws Exception
+ {
+ PrintWriter out;
+ BufferedReader in=null;
+ boolean file=false;
+
+ switch (args.length)
+ {
+ case 0: out = new PrintWriter(System.out);
+ in = new BufferedReader(new InputStreamReader(System.in));
+ break;
+ case 1: out = new PrintWriter(System.out);
+ file = true;
+ break;
+ default: out = new PrintWriter(new FileOutputStream(args[1]));
+ file = true;
+ }
+
+ if (file)
+ {
+ if (args[0].indexOf("http://") >= 0)
+ in = new BufferedReader(new InputStreamReader(new BufferedInputStream((new URL(args[0])).openStream())));
+ else
+ in = new BufferedReader(new InputStreamReader(new FileInputStream(args[0])));
+ }
+
+ new AcipToTab(in, out).run();
+ }
+
+ public void run() throws Exception
+ {
+ final short newDefiniendum=1, halfDefiniendum=2, definition=3;
+ short status=newDefiniendum;
+ int marker, len, marker2, n=0, total=0, currentPage=0, currentLine=1, pos;
+ char ch;
+ String entrada="", currentLetter="", temp="", lastDefiniendum="", lastWeirdDefiniendum="";
+ boolean markerNotFound;
+ currentDefiniendum="";
+ currentDefinition="";
+ outAHere:
+ while (true)
+ {
+ entrada=in.readLine();
+ if (entrada==null) break;
+ currentLine++;
+
+ entrada = entrada.trim();
+ len = entrada.length();
+ if (len<=0) continue;
+
+ // get page number
+ if (entrada.charAt(0)=='@')
+ {
+ marker = 1;
+ while(marker0)
+ {
+ currentPage=Integer.parseInt(temp);
+ if (currentPage==3141)
+ {
+ System.out.println("Hello!");
+ }
+ }
+ if (marker0) n++;
+ lastDefiniendum=currentDefiniendum;
+ currentDefiniendum="";
+ currentDefinition="";
+ }
+
+ marker=marker2=1;
+ markerNotFound=true;
+
+ while (marker < len)
+ {
+ ch = entrada.charAt(marker);
+ switch(ch)
+ {
+ case '/':
+ markerNotFound=false;
+ marker2=marker+1;
+ break;
+ case '(': case '<':
+ markerNotFound=false;
+ marker2=marker;
+ break;
+ case 'g': // verify "g "
+ if (marker+10 && Manipulate.isVowel(entrada.charAt(pos-1)) && (markerNotFound || entrada.substring(0,pos+1).length() < entrada.substring(0, marker).trim().length()))
+ {
+ // out.println(currentPage + ": " + entrada);
+ n++;
+ }*/
+
+ /* either this is a definiendum that consists of several lines or
+ it is part of the last definition. */
+ if (markerNotFound)
+ {
+ /* assume that the definiendum goes on to the next line. */
+ currentDefiniendum = currentDefiniendum + " ";
+ status=halfDefiniendum;
+ }
+ else
+ {
+ // total++;
+
+ currentDefiniendum = currentDefiniendum + entrada.substring(0,marker).trim();
+ currentDefinition = "[" + currentPage + "] " + entrada.substring(marker2).trim();
+
+ status=definition;
+
+ while (true)
+ {
+ entrada=in.readLine();
+
+ if (entrada==null)
+ {
+ // add here
+ add();
+
+ // if (new TibetanString(lastDefiniendum).compareTo(new TibetanString(currentDefiniendum))>0) n++;
+ break outAHere;
+ }
+
+ currentLine++;
+ entrada = entrada.trim();
+
+ if (entrada.equals("")) break;
+ else
+ {
+ currentDefinition = currentDefinition + " " + entrada;
+ }
+ }
+
+ }
+ }
+ else // last line did not start with the current letter, it must still be part of the definition
+ {
+ currentDefinition = currentDefinition + " " + entrada;
+ while (true)
+ {
+ entrada=in.readLine();
+
+ if (entrada==null)
+ {
+ // add here
+ add();
+
+ // if (new TibetanString(lastDefiniendum).compareTo(new TibetanString(currentDefiniendum))>0) n++;
+ break outAHere;
+ }
+
+ currentLine++;
+ entrada = entrada.trim();
+
+ if (entrada.equals("")) break;
+ {
+ currentDefinition = currentDefinition + " " + entrada;
+ }
+ }
+ }
+
+ } else // if first character was not a letter, it must still be part of definition
+ {
+ currentDefinition = currentDefinition + " " + entrada;
+ while (true)
+ {
+ entrada=in.readLine();
+
+ if (entrada==null)
+ {
+ // add here
+ add();
+
+ break outAHere;
+ }
+
+ currentLine++;
+ entrada = entrada.trim();
+
+ if (entrada.equals("")) break;
+ else
+ {
+ currentDefinition = currentDefinition + " " + entrada;
+ }
+ }
+ }
+ }
+// out.println(n + " / " + total);
+ out.flush();
+ }
+}
\ No newline at end of file
diff --git a/source/org/thdl/tib/scanner/AppletScannerFilter.java b/source/org/thdl/tib/scanner/AppletScannerFilter.java
index a6dff66..6342e53 100644
--- a/source/org/thdl/tib/scanner/AppletScannerFilter.java
+++ b/source/org/thdl/tib/scanner/AppletScannerFilter.java
@@ -70,16 +70,13 @@ public class AppletScannerFilter extends JApplet implements ActionListener, Focu
}
diagAbout = null;
-
+
// sp = new SimpleScannerPanel(url);
- sp = new DuffScannerPanel(url);
-
+ sp = new DuffScannerPanel(url);
sp.addFocusListener(this);
-
setContentPane(sp);
// setup the menu. Almost identical to WindowScannerFilter, but using swing.
-
JMenuBar mb = new JMenuBar();
mnuEdit = new JMenu ("Edit");
mnuCut = new JMenuItem("Cut");
@@ -107,13 +104,14 @@ public class AppletScannerFilter extends JApplet implements ActionListener, Focu
mnuClear.addActionListener(this);
mb.add(mnuEdit);
- JMenu m = new JMenu("View");
+ JMenu m;
+
+ /* m = new JMenu("View");
tibScript = new JCheckBoxMenuItem("Tibetan Script", true);
m.add(tibScript);
tibScript.addItemListener(this);
- mb.add(m);
+ mb.add(m);*/
- //JMenuItem
aboutItem = new JMenuItem("About...");
aboutItem.addActionListener(this);
@@ -128,7 +126,7 @@ public class AppletScannerFilter extends JApplet implements ActionListener, Focu
SymComponent aSymComponent = new SymComponent();
this.addComponentListener(aSymComponent);
//}}
-
+
fakeFrame = new Frame();
if (!ThdlOptions.getBooleanOption(AboutDialog.windowAboutOption))
{
diff --git a/source/org/thdl/tib/scanner/BinaryFileGenerator.java b/source/org/thdl/tib/scanner/BinaryFileGenerator.java
index c147499..30aad8a 100644
--- a/source/org/thdl/tib/scanner/BinaryFileGenerator.java
+++ b/source/org/thdl/tib/scanner/BinaryFileGenerator.java
@@ -150,6 +150,8 @@ myglossary_uma.txt in the transliteration format explained above.
*/
public class BinaryFileGenerator extends SimplifiedLinkedList
{
+ private static final int versionNumber = 3;
+
private long posHijos;
private String sil, def[];
public final static int delimiterGeneric=0;
@@ -158,7 +160,7 @@ public class BinaryFileGenerator extends SimplifiedLinkedList
/** Number of dictionary. If 0, partial word (no definition).
*/
- private DictionarySource sourceDef;
+ private ByteDictionarySource sourceDef;
public static RandomAccessFile wordRaf;
private static RandomAccessFile defRaf;
@@ -181,14 +183,16 @@ public class BinaryFileGenerator extends SimplifiedLinkedList
{
super();
int marker = sil.indexOf(" ");
- this.sourceDef = new DictionarySource();
+
+ // fix for updates
+ this.sourceDef = new ByteDictionarySource();
if (marker<0)
{
this.sil = sil;
this.def = new String[1];
this.def[0] = def;
- this.sourceDef.add(numDef);
+ this.sourceDef.addNewDef(numDef);
}
else
{
@@ -208,9 +212,7 @@ public class BinaryFileGenerator extends SimplifiedLinkedList
{
final short newDefiniendum=1, halfDefiniendum=2, definition=3;
short status=newDefiniendum;
- int marker, len, marker2;
-// int n=0;
- int currentPage=0, currentLine=1;
+ int marker, len, marker2, currentPage=0, currentLine=1;
char ch;
BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(archivo)));
String entrada="", s1="", s2="", currentLetter="", temp="", lastWeirdDefiniendum="", alternateWords[];
@@ -495,6 +497,11 @@ public class BinaryFileGenerator extends SimplifiedLinkedList
s2 = Manipulate.deleteQuotes(entrada.substring(marker+delimiter.length()).trim());
if (!s2.equals(""))
{
+ if (currentLine%5000==0)
+ {
+ System.out.println("Adding " + s1 + "...");
+ System.out.flush();
+ }
marker2 = s1.indexOf(';');
if (marker2>0)
{
@@ -564,32 +571,234 @@ public class BinaryFileGenerator extends SimplifiedLinkedList
}
}
}
+
+ private void reGroup (int n)
+ {
+ int i, pos, posEnd;
+
+ for (i=0; i=def[n].length())
+ {
+ pos = def[i].indexOf(def[n]);
+
+ // if it is the same String exactly
+ if (pos==0 && def[i].length()==def[n].length())
+ {
+ if (i0 && !Character.isLetter(def[i].charAt(pos-1)))) && (posEnd==def[i].length() || !Character.isLetter(def[i].charAt(posEnd))))
+ {
+ if(sourceDef.getDef(i).contains(sourceDef.getDef(n)))
+ {
+ def = Manipulate.deleteString(def, n);
+ sourceDef.deleteDef(n);
+ return;
+ }
+
+ // else
+ sourceDef.addDictToDef(sourceDef.getDef(i), n);
+
+ do
+ {
+ def[i] = Manipulate.replace(def[i], pos, posEnd, "*");
+ pos = def[i].indexOf(def[n]);
+ posEnd = pos + def[n].length();
+ } while ((pos==0 || (pos>0 && !Character.isLetter(def[i].charAt(pos-1)))) && (posEnd==def[i].length() || !Character.isLetter(def[i].charAt(posEnd))));
+
+ if (i0 && !Character.isLetter(def[n].charAt(pos-1)))) && (posEnd==def[n].length() || !Character.isLetter(def[n].charAt(posEnd))))
+ {
+ if (sourceDef.getDef(n).contains(sourceDef.getDef(i)))
+ {
+ def = Manipulate.deleteString(def, i);
+ sourceDef.deleteDef(i);
+ i--;
+ continue;
+ }
+
+ sourceDef.addDictToDef(sourceDef.getDef(n), i);
+
+ do
+ {
+ def[n] = Manipulate.replace(def[n], pos, posEnd, "*");
+ pos = def[n].indexOf(def[i]);
+ posEnd = pos + def[i].length();
+ } while ((pos==0 || (pos>0 && !Character.isLetter(def[n].charAt(pos-1)))) && (posEnd==def[n].length() || !Character.isLetter(def[n].charAt(posEnd))));
+
+ i=-1; // start over
+ continue;
+ }
+ }
+
+ // deal with repetition of dictionaries
+
+ if (sourceDef.getDef(i).equals(sourceDef.getDef(n)))
+ {
+ if (i=def.length())
+ {
+ pos = this.def[i].indexOf(def);
+ posEnd = pos + def.length();
+ if ((pos==0 || (pos>0 && !Character.isLetter(this.def[i].charAt(pos-1)))) && (posEnd==this.def[i].length() || !Character.isLetter(this.def[i].charAt(posEnd))))
+ {
+ if (!sourceDef.isDictInDef(numDef, i))
+ {
+ if (this.def[i].length()>def.length())
+ {
+ //temp = Manipulate.deleteSubstring(this.def[i], pos, posEnd);
+ temp = this.def[i];
+ do
+ {
+ temp = Manipulate.replace(temp, pos, posEnd, "*");
+ pos = temp.indexOf(def);
+ posEnd = pos + def.length();
+ } while ((pos==0 || (pos>0 && !Character.isLetter(temp.charAt(pos-1)))) && (posEnd==temp.length() || !Character.isLetter(temp.charAt(posEnd))));
+
+ this.def[i] = def;
+ this.def = Manipulate.addString(this.def, temp, i+1);
+ sourceDef.dubDef(i);
+ sourceDef.addDictToDef(numDef, i);
+
+ reGroup(i);
+ if (i+10 && !Character.isLetter(def.charAt(pos-1)))) && (posEnd==def.length() || !Character.isLetter(def.charAt(posEnd))))
+ {
+ if (sourceDef.isDictInDefAlone(numDef, i))
+ {
+ this.def[i] = def;
+ reGroup(i);
+ }
+ else
+ {
+ sourceDef.addDictToDef(numDef, i);
+ do
+ {
+ //def = Manipulate.deleteSubstring(def, pos, posEnd);
+ def = Manipulate.replace(def, pos, posEnd, "*");
+ pos = def.indexOf(this.def[i]);
+ posEnd = pos + this.def[i].length();
+ } while ((pos==0 || (pos>0 && !Character.isLetter(def.charAt(pos-1)))) && (posEnd==def.length() || !Character.isLetter(def.charAt(posEnd))));
+ }
+ changed = true;
+ }
+ }
+ i++;
+ }
+ } while (changed);
+
+ if (notAlreadyThere)
+ {
+ // check if it is a duplicate for the same dictionary.
+ i = sourceDef.containsAlone(numDef);
+ if (i>-1)
+ {
+ this.def[i] = this.def[i] + ". " + def;
+ reGroup(i);
+ }
+ else
+ {
+ this.def = Manipulate.addString(this.def, def, this.def.length);
+ sourceDef.addNewDef(numDef);
+ reGroup(this.def.length-1);
}
- newDef[i] = def;
- this.def = newDef;
- sourceDef.add(numDef);
}
}
}
@@ -617,8 +826,8 @@ public class BinaryFileGenerator extends SimplifiedLinkedList
{
try
{
- wordRaf.writeInt((int)defRaf.getFilePointer());
- defRaf.writeUTF(def[i]);
+ wordRaf.writeInt((int)defRaf.getFilePointer());
+ defRaf.writeUTF(def[i]);
}
catch (Exception e)
{
@@ -681,6 +890,12 @@ public class BinaryFileGenerator extends SimplifiedLinkedList
print();
wordRaf.writeInt((int)posHijos);
+ // write version marker
+ wordRaf.writeShort(-1);
+ wordRaf.writeByte(-1);
+
+ // write version number
+ wordRaf.writeByte(versionNumber);
}
public static void main(String args[]) throws Exception
@@ -754,11 +969,14 @@ public class BinaryFileGenerator extends SimplifiedLinkedList
{
delimiterType=delimiterDash;
}
+ System.out.println("\nProcessing " + args[i] + "...");
sl.addFile(args[i] + ".txt", delimiterType, delimiter, n);
n++; i++;
}
}
}
+ System.out.println("Writing to file " + args[a] + "...");
+ System.out.flush();
sl.generateDatabase(args[a]);
}
}
diff --git a/source/org/thdl/tib/scanner/BitDictionarySource.java b/source/org/thdl/tib/scanner/BitDictionarySource.java
new file mode 100644
index 0000000..7725d33
--- /dev/null
+++ b/source/org/thdl/tib/scanner/BitDictionarySource.java
@@ -0,0 +1,171 @@
+/*
+The contents of this file are subject to the AMP Open Community License
+Version 1.0 (the "License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License on the AMP web site
+(http://www.tibet.iteso.mx/Guatemala/).
+
+Software distributed under the License is distributed on an "AS IS" basis,
+WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+License for the specific terms governing rights and limitations under the
+License.
+
+The Initial Developer of this software is Andres Montano Pellegrini. Portions
+created by Andres Montano Pellegrini are Copyright 2001 Andres Montano
+Pellegrini. All Rights Reserved.
+
+Contributor(s): ______________________________________.
+*/
+package org.thdl.tib.scanner;
+
+import java.io.*;
+
+/** Specifies a subset of dictionaries among a set of
+ dictionaries. Supports a maximum of 30 dictionaries.
+
+ @author Andrés Montano Pellegrini
+*/
+public class BitDictionarySource extends DictionarySource
+{
+ private int dicts;
+
+ /** Last bit of word; 1 if there are more brothers.*/
+ private static final int lastBit=1073741824;
+ private static final int allDicts=lastBit-1;
+
+ public BitDictionarySource()
+ {
+ dicts = 0;
+ }
+
+ public BitDictionarySource(int dicts)
+ {
+ this.dicts = dicts;
+ }
+
+ public boolean equals(Object obj)
+ {
+ BitDictionarySource objB = (BitDictionarySource) obj;
+ return (this.getDicts()==objB.getDicts());
+ }
+
+ /** Returns an instance of DictionarySource with all dictionaries selected */
+ public BitDictionarySource getAllDictionaries()
+ {
+ BitDictionarySource ds = new BitDictionarySource();
+ ds.setDicts(allDicts);
+ return ds;
+ }
+
+ /** Marks all dictionaries as selected */
+ public void setAllDictionaries()
+ {
+ dicts = allDicts;
+ }
+
+ /** Assumes dicts is an array of bits, and selects the dictionaries marked by
+ each bit. */
+ public void setDicts(int dicts)
+ {
+ this.dicts = dicts;
+ }
+
+ /** Returns an array of bits representing the selected dictionaries. */
+ public int getDicts()
+ {
+ return dicts;
+ }
+
+ private int getBits(int n)
+ {
+ return 1 << n;
+ }
+
+ public boolean contains(int dict)
+ {
+ return (dicts & getBits(dict))>0;
+ }
+
+ public boolean contains(BitDictionarySource dicts)
+ {
+ return this.intersection(dicts).equals(dicts);
+ }
+
+ /** Marks the dictionary "dict" as selected */
+ public void add(int dict)
+ {
+ dicts|= getBits(dict);
+ }
+
+ public void add(BitDictionarySource dicts)
+ {
+ this.dicts|= dicts.dicts;
+ }
+
+ /** Write to file using BinaryFileGenerator */
+ public void print(boolean hasNext, DataOutput raf) throws IOException
+ {
+ int numDict;
+ if (hasNext) numDict = lastBit | dicts;
+ else numDict = dicts;
+ raf.writeInt(numDict);
+ }
+
+ public void read(DataInput raf) throws IOException
+ {
+ setDicts(raf.readInt());
+ }
+
+ public boolean hasBrothers()
+ {
+ return (dicts & lastBit)>0;
+ }
+
+ public int countDefs()
+ {
+ int n, source;
+ for (n=0, source = dicts & allDicts; source>0; source>>=1)
+ if (source%2==1) n++;
+ return n;
+ }
+
+ public DictionarySource intersection(DictionarySource dsO)
+ {
+ BitDictionarySource ds = new BitDictionarySource(), dsOB = (BitDictionarySource) dsO;
+ ds.setDicts(this.dicts & dsOB.dicts);
+ return ds;
+ }
+
+ /** Returns an array containing the indexes for the available dictionaries. Use this
+ method when you know exactly how many dictionaries there are! */
+ public int[] untangleDefs(int n)
+ {
+ int arr[], i, pos, source;
+ arr = new int[n];
+ for (i=0, pos=0, source=dicts & allDicts; pos>=1)
+ if (source%2==1)
+ arr[pos++]=i;
+ return arr;
+ }
+
+ /** Returns an array containing the indexes for the available dictionaries.*/
+ public int[] untangleDefs()
+ {
+ return untangleDefs(countDefs());
+ }
+
+ public boolean isEmpty()
+ {
+ return (dicts & allDicts)<=0;
+ }
+
+ public void reset()
+ {
+ dicts = 0;
+ }
+ public String getTag(int i)
+ {
+ int source[] = this.untangleDefs();
+ if (defTags==null) return Integer.toString(source[i]+1);
+ return defTags[source[i]];
+ }
+}
\ No newline at end of file
diff --git a/source/org/thdl/tib/scanner/ByteDictionarySource.java b/source/org/thdl/tib/scanner/ByteDictionarySource.java
new file mode 100644
index 0000000..94fc31d
--- /dev/null
+++ b/source/org/thdl/tib/scanner/ByteDictionarySource.java
@@ -0,0 +1,307 @@
+/*
+The contents of this file are subject to the AMP Open Community License
+Version 1.0 (the "License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License on the AMP web site
+(http://www.tibet.iteso.mx/Guatemala/).
+
+Software distributed under the License is distributed on an "AS IS" basis,
+WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+License for the specific terms governing rights and limitations under the
+License.
+
+The Initial Developer of this software is Andres Montano Pellegrini. Portions
+created by Andres Montano Pellegrini are Copyright 2001 Andres Montano
+Pellegrini. All Rights Reserved.
+
+Contributor(s): ______________________________________.
+*/
+package org.thdl.tib.scanner;
+
+import java.io.*;
+
+/** Specifies a subset of dictionaries among a set of
+ dictionaries. Supports a maximum of 30 dictionaries.
+ Unlike @BitDictionarySource, it provides the infrastructure
+ to group definitions from various dictionaries.
+
+ @author Andrés Montano Pellegrini
+*/
+public class ByteDictionarySource extends DictionarySource
+{
+ //private BitDictionarySource dicts[];
+ private BitDictionarySource dicts[];
+ private boolean hasBrother;
+
+ /** Last bit of word; 1 if there are more brothers.*/
+ private static final int lastBit = 64;
+ private static final int allDicts=lastBit-1;
+
+ public ByteDictionarySource()
+ {
+ dicts = null;
+ hasBrother = false;
+ }
+
+ public ByteDictionarySource(BitDictionarySource dicts[], boolean hasBrother)
+ {
+ this.dicts = dicts;
+ this.hasBrother = hasBrother;
+ }
+
+ public void insertDef(BitDictionarySource newDef, int n)
+ {
+ int i;
+ BitDictionarySource newDicts[] = new BitDictionarySource[dicts.length+1];
+
+ for (i=0; i0)
+ {
+ hasBrother = true;
+ n = n & allDicts;
+ }
+ else hasBrother = false;
+
+ if (n==0)
+ {
+ dicts = null;
+ return;
+ }
+
+ dicts = new BitDictionarySource[n];
+
+ for (i=0; i< dicts.length; i++)
+ {
+ dicts[i] = new BitDictionarySource();
+ do
+ {
+ n = (int) raf.readByte();
+ dicts[i].add(n & allDicts);
+ } while((n & lastBit)>0);
+ }
+ }
+
+ public boolean hasBrothers()
+ {
+ return this.hasBrother;
+ }
+
+ public boolean contains(int dict)
+ {
+ int i;
+
+ if (dicts==null) return false;
+
+ for (i=0; i0;
- }
-
- public void add(int dict)
- {
- dicts|= getBits(dict);
- }
-
- /** Write to file using BinaryFileGenerator */
- public void print(boolean hasNext, DataOutput raf) throws IOException
- {
- int numDict;
- if (hasNext) numDict = lastBit | dicts;
- else numDict = dicts;
- raf.writeInt(numDict);
- }
-
- public static DictionarySource read(DataInput raf) throws IOException
- {
- DictionarySource ds = new DictionarySource();
- ds.setDicts(raf.readInt());
- return ds;
- }
-
- public boolean hasBrothers()
- {
- return (dicts & lastBit)>0;
- }
-
- public int countDefs()
- {
- int n, source;
- for (n=0, source = dicts & allDicts; source>0; source>>=1)
- if (source%2==1) n++;
- return n;
- }
-
- public DictionarySource intersection(DictionarySource dsO)
- {
- DictionarySource ds = new DictionarySource();
- ds.setDicts(this.dicts & dsO.dicts);
- return ds;
- }
-
- public int[] untangleDefs(int n)
- {
- int arr[], i, pos, source;
- arr = new int[n];
- for (i=0, pos=0, source=dicts & allDicts; pos>=1)
- if (source%2==1)
- arr[pos++]=i;
- return arr;
- }
-
- public int[] untangleDefs()
- {
- return untangleDefs(countDefs());
- }
-
- public boolean isEmpty()
- {
- return (dicts & allDicts)<=0;
- }
-
- public void reset()
- {
- dicts = 0;
- }
-}
+ /** Writes the dictionary information to a random access file. */
+ public abstract void print(boolean hasNext, DataOutput raf) throws IOException;
+
+ /** Reads the dictionary information from a random access file, according
+ to the way it was written with @print. */
+ public abstract void read(DataInput raf) throws IOException;
+
+ /** Returns the number of definitions available. */
+ public abstract int countDefs();
+
+ /** Returns true if the node has brothers. This is used by @FileSyllableListTree. */
+ public abstract boolean hasBrothers();
+
+ /** Returns true if dict is a selected dictionary. */
+ public abstract boolean contains(int dict);
+}
\ No newline at end of file
diff --git a/source/org/thdl/tib/scanner/DictionaryTableModel.java b/source/org/thdl/tib/scanner/DictionaryTableModel.java
index 18f2b6f..12e1596 100644
--- a/source/org/thdl/tib/scanner/DictionaryTableModel.java
+++ b/source/org/thdl/tib/scanner/DictionaryTableModel.java
@@ -64,7 +64,7 @@ public class DictionaryTableModel extends AbstractTableModel
case 0:
if (tibetanActivated) return arrayTibetan[row];
else return array[row].getWylie();
- case 1: return array[row].getDef();
+ case 1: return array[row].getDefPreview();
default: return array[row].toString();
}
}
diff --git a/source/org/thdl/tib/scanner/FileSyllableListTree.java b/source/org/thdl/tib/scanner/FileSyllableListTree.java
index 5b72071..98013ef 100644
--- a/source/org/thdl/tib/scanner/FileSyllableListTree.java
+++ b/source/org/thdl/tib/scanner/FileSyllableListTree.java
@@ -38,20 +38,24 @@ public class FileSyllableListTree implements SyllableListTree
private long def[];
private long posLista;
private DictionarySource defSource;
- public static DictionarySource defSourcesWanted;
+ public static BitDictionarySource defSourcesWanted;
public static RandomAccessFile wordRaf=null;
private static RandomAccessFile defRaf=null;
+ public static int versionNumber;
/** Creates the root */
public FileSyllableListTree(String archivo, int defSourcesWanted) throws Exception
{
sil = null;
def = null;
- this.defSource = new DictionarySource();
- openFiles(archivo);
- posLista = wordRaf.length() - 4;
- wordRaf.seek(posLista);
- posLista = (long)wordRaf.readInt();
+ defSource = null;
+
+ this.openFiles(archivo);
+ posLista = this.wordRaf.getFilePointer();
+
+ /* if versionNumber is 2 use BitDictionarySource
+ else use ByteDictionarySource. */
+ this.defSourcesWanted.setDicts(defSourcesWanted);
}
/** Used to create each node (except the root)
@@ -73,12 +77,61 @@ public class FileSyllableListTree implements SyllableListTree
{
return defSource;
}
+
+ public BitDictionarySource getDictionarySourcesWanted()
+ {
+ return this.defSourcesWanted;
+ }
public static void openFiles(String archivo) throws Exception
{
+ long fileSize;
+ int pos;
+
wordRaf = new RandomAccessFile(archivo + ".wrd", "r");
defRaf = new RandomAccessFile(archivo + ".def", "r");
- defSourcesWanted = DictionarySource.getAllDictionaries();
+
+ fileSize = wordRaf.length();
+ wordRaf.seek(fileSize-4L);
+ pos = wordRaf.readInt();
+
+ if (pos >> 8 == -1)
+ {
+ versionNumber = pos & 255;
+
+ // for now, only version 2 & 3 should be expected
+ if (versionNumber != 3) versionNumber=2;
+ wordRaf.seek(fileSize-8L);
+ pos = wordRaf.readInt();
+ }
+ else
+ {
+ // Updates the dictionary for backward compatibility.
+ try
+ {
+ wordRaf.close();
+ wordRaf = new RandomAccessFile(archivo + ".wrd", "rw");
+ wordRaf.seek(fileSize);
+ wordRaf.writeShort(-1);
+ wordRaf.writeByte(-1);
+
+ // Because it didn't have a version number, must be version 2.
+ versionNumber = 2;
+ wordRaf.writeByte(versionNumber);
+ wordRaf.close();
+ wordRaf = new RandomAccessFile(archivo + ".wrd", "r");
+ }
+ catch (Exception e)
+ {
+ // dictionary is stored on a non-writable media. Do nothing.
+ }
+ }
+
+ /* if versionNumber is 2 use BitDictionarySource else use
+ ByteDictionarySource. */
+ defSourcesWanted = new BitDictionarySource();
+
+ wordRaf.seek(pos);
}
public String getDef()
@@ -89,28 +142,54 @@ public class FileSyllableListTree implements SyllableListTree
public Definitions getDefs()
{
if (def==null) return null;
- DictionarySource defSourceAvail = defSource.intersection(defSourcesWanted);
-
- int defsAvail[] = defSourceAvail.untangleDefs(), defsFound[] = defSource.untangleDefs(def.length);
-
-
- String defs[] = new String[defsAvail.length];
+ DictionarySource defSourceAvail = defSource.intersection(defSourcesWanted);
+ String defs[];
int i, n=0;
- try
+
+ if (versionNumber==2)
{
- for (i=0; i" + dictionaries[i] + " (" + Definitions.defTags[i] + ") ");
+ out.print(">" + dictionaries[i] + " (" + DictionarySource.defTags[i] + ") ");
else
- out.print(">" + Definitions.defTags[i] + " ");
+ out.print(">" + DictionarySource.defTags[i] + " ");
// out.println(" + "");
}
// out.println(" ");
}
- else ds = DictionarySource.getAllDictionaries();
+ // fix for updates
+ else ds = new BitDictionarySource().getAllDictionaries();
// out.println("");
out.println("
");
out.println("");
@@ -300,36 +300,40 @@ public class OnLineScannerFilter extends HttpServlet
init = fin+1;
} */
+ scanner.clearTokens();
scanner.scanBody(in);
scanner.finishUp();
- words = scanner.getTokenArray();
- printText(pw, words, tibetan);
- printAllDefs(pw, words, tibetan);
+ printText(pw, tibetan);
+ printAllDefs(pw, tibetan);
scanner.clearTokens();
}
}
- public void printText(PrintWriter pw, Object words[], boolean tibetan)
+ public void printText(PrintWriter pw, boolean tibetan)
{
- Token token;
+ Token words[] = scanner.getTokenArray();
Word word;
char pm;
int i;
+ if (words==null) return;
+
pw.print("");
for (i=0; i < words.length; i++)
{
- token = (Token) words[i];
- if (token instanceof Word)
+
+ if (words[i] instanceof Word)
{
- word = (Word) token;
- pw.print(word.getLink());
+ word = (Word) words[i];
+ if (word.getDefs().getDictionarySource()!=null)
+ pw.print(word.getLink());
+ else pw.print(word.getWylie() + " ");
}
else
{
- if (token instanceof PunctuationMark)
+ if (words[i] instanceof PunctuationMark)
{
- pm = token.toString().charAt(0);
+ pm = words[i].toString().charAt(0);
switch (pm)
{
case '\n':
@@ -352,53 +356,49 @@ public class OnLineScannerFilter extends HttpServlet
pw.println("
");
}
- public void printAllDefs(PrintWriter pw, Object words[], boolean tibetan)
+ public void printAllDefs(PrintWriter pw, boolean tibetan)
{
- SimplifiedLinkedList temp = new SimplifiedLinkedList();
- int i;
- Word word;
+ int i, j;
+ Word words[];
Definitions defs;
-
- for (i=words.length-1; i >= 0; i--)
- {
- if (words[i] instanceof Word)
- {
- if (!temp.contains(words[i]))
- {
- temp.addLast(words[i]);
- }
- }
- }
-
- SimplifiedListIterator li = temp.listIterator();
String tag;
+ DictionarySource ds;
+
+ words = scanner.getWordArray(false);
+
+ if (words == null) return;
+
pw.println("");
- while (li.hasNext())
+
+ for (j=0; j");
- tag = defs.getTag(0);
- if (tag!=null)
- {
- pw.println(" "+ word.getBookmark(tibetan) +" | ");
- pw.println(" "+ tag +" | ");
- pw.println(" " + defs.def[0] + " | ");
- }
+ tag = ds.getTag(0);
+ // else tag = null;
+ /*if (tag!=null)
+ {*/
+ pw.println(" "+ words[j].getBookmark(tibetan) +" | ");
+ pw.println(" "+ tag +" | ");
+ pw.println(" " + defs.def[0] + " | ");
+ /*}
else
{
- pw.println(" "+ word.getBookmark(tibetan) +" | ");
+ pw.println(" "+ words[j].getBookmark(tibetan) +" | ");
pw.println(" " + defs.def[0] + " | ");
- }
+ }*/
pw.println(" ");
for (i=1; i");
- tag = defs.getTag(i);
+ if (ds!=null) tag = ds.getTag(i);
+ else tag = null;
if (tag!=null)
{
- pw.println(" "+ tag +" | ");
- pw.println(" " + defs.def[i] + " | ");
+ pw.println(" "+ tag +" | ");
+ pw.println(" " + defs.def[i] + " | ");
}
else pw.println(" " + defs.def[i] + " | ");
pw.println(" ");
diff --git a/source/org/thdl/tib/scanner/RemoteScannerFilter.java b/source/org/thdl/tib/scanner/RemoteScannerFilter.java
index 8824f32..b5fa9f3 100644
--- a/source/org/thdl/tib/scanner/RemoteScannerFilter.java
+++ b/source/org/thdl/tib/scanner/RemoteScannerFilter.java
@@ -35,7 +35,7 @@ import javax.servlet.http.*;
public class RemoteScannerFilter extends GenericServlet
{
private TibetanScanner scanner;
- private DictionarySource ds;
+ private BitDictionarySource ds;
public RemoteScannerFilter() throws Exception
{
@@ -53,6 +53,7 @@ public class RemoteScannerFilter extends GenericServlet
PrintWriter out = res.getWriter();
int i;
String linea, dicts = req.getParameter("dicts"), dicDescrip[];
+
if (dicts!=null)
{
if (dicts.equals("names"))
@@ -66,7 +67,7 @@ public class RemoteScannerFilter extends GenericServlet
for (i=0; ilenPreview) preview = preview.substring(0,lenPreview);
listDefs.add(preview);
}
diff --git a/source/org/thdl/tib/scanner/SyllableListTree.java b/source/org/thdl/tib/scanner/SyllableListTree.java
index 15101c6..bf2ac9b 100644
--- a/source/org/thdl/tib/scanner/SyllableListTree.java
+++ b/source/org/thdl/tib/scanner/SyllableListTree.java
@@ -39,5 +39,7 @@ public interface SyllableListTree
public Definitions getDefs();
public boolean hasDef();
public SyllableListTree lookUp(String silStr);
+
public DictionarySource getDictionarySource();
+ public BitDictionarySource getDictionarySourcesWanted();
}
\ No newline at end of file
diff --git a/source/org/thdl/tib/scanner/TibetanScanner.java b/source/org/thdl/tib/scanner/TibetanScanner.java
index f447118..a75395b 100644
--- a/source/org/thdl/tib/scanner/TibetanScanner.java
+++ b/source/org/thdl/tib/scanner/TibetanScanner.java
@@ -25,7 +25,7 @@ import org.thdl.util.*;
*/
public abstract class TibetanScanner
{
- public static final String version = "The Tibetan to English Translation Tool, version 2.2.2 compiled on " + ThdlVersion.getTimeOfCompilation() + ". ";
+ public static final String version = "The Tibetan to English Translation Tool, version 3.0.0 compiled on " + ThdlVersion.getTimeOfCompilation() + ". ";
public static final String copyrightUnicode="Copyright " + '\u00A9' + " 2000-2004 by Andr" + '\u00E9' + "s Montano Pellegrini, all rights reserved.";
public static final String copyrightASCII="Copyright 2000-2004 by Andres Montano Pellegrini, all rights reserved.";
public static final String copyrightHTML="
" + version + "Copyright © 2000-2004 by Andrés Montano Pellegrini.
All rights reserved.";
@@ -293,6 +293,7 @@ public abstract class TibetanScanner
public Token[] getTokenArray()
{
int n=wordList.size();
+ if (n==0) return null;
Token token[] = new Token[n];
SimplifiedListIterator li = wordList.listIterator();
while(li.hasNext())
@@ -306,35 +307,76 @@ public abstract class TibetanScanner
}
public Word[] getWordArray()
+ {
+ return getWordArray(true);
+ }
+
+ public Word[] getWordArray(boolean includeRepeated)
{
Token token;
- Word array[];
+ Word array[], word;
int n=0;
SimplifiedListIterator li = wordList.listIterator();
- while(li.hasNext())
- if (li.next() instanceof Word) n++;
-
- if (n==0) return null;
-
- array = new Word[n];
- n--;
- li = wordList.listIterator();
+ SimplifiedLinkedList ll2, ll = new SimplifiedLinkedList();
+
while(li.hasNext())
{
token = (Token) li.next();
- if (token instanceof Word)
- {
- array[n] = (Word) token;
- n--;
+
+ if (token instanceof Word)
+ {
+ ll.addLast(token);
}
}
+ if (includeRepeated)
+ {
+ n = ll.size();
+
+ if (n==0) return null;
+
+ array = new Word[n];
+ li = ll.listIterator();
+
+ n=0;
+ while (li.hasNext())
+ {
+ array[n++] = (Word) li.next();
+ }
+ }
+ else
+ {
+ ll2 = new SimplifiedLinkedList();
+ li = ll.listIterator();
+
+ while(li.hasNext())
+ {
+ word = (Word) li.next();
+
+ if (!ll2.contains(word)) ll2.addLast(word);
+ }
+
+ n = ll2.size();
+
+ if (n==0) return null;
+
+ array = new Word[n];
+ li = ll2.listIterator();
+
+ while (li.hasNext())
+ {
+ array[--n] = (Word) li.next();
+ }
+ }
+
+
+
return array;
}
public abstract void scanLine(String linea);
public abstract void scanBody(String linea);
public abstract void finishUp();
- public abstract DictionarySource getDictionarySource();
+ public abstract BitDictionarySource getDictionarySource();
public abstract String[] getDictionaryDescriptions();
}
diff --git a/source/org/thdl/tib/scanner/WindowScannerFilter.java b/source/org/thdl/tib/scanner/WindowScannerFilter.java
index b7ff1b7..4a20817 100644
--- a/source/org/thdl/tib/scanner/WindowScannerFilter.java
+++ b/source/org/thdl/tib/scanner/WindowScannerFilter.java
@@ -189,10 +189,12 @@ public class WindowScannerFilter implements WindowListener, FocusListener, Actio
mnuDicts = new CheckboxMenuItem("Dictionaries", false);
m.add(mnuDicts);
mnuDicts.addItemListener(this);
+ mb.add(m);
}
- else
+
+ m = new Menu("Help");
+ if (!pocketpc)
{
- m = new Menu("Help");
for (int i = 0; i < DuffScannerPanel.keybdMgr.size(); i++)
{
final JskadKeyboard kbd = DuffScannerPanel.keybdMgr.elementAt(i);
@@ -217,10 +219,10 @@ public class WindowScannerFilter implements WindowListener, FocusListener, Actio
}
}
m.add("-");
- mnuAbout = new MenuItem("About...");
- m.add(mnuAbout);
- mnuAbout.addActionListener(this);
- }
+ }
+ mnuAbout = new MenuItem("About...");
+ m.add(mnuAbout);
+ mnuAbout.addActionListener(this);
mb.add(m);
// disable menus
diff --git a/source/org/thdl/tib/scanner/Word.java b/source/org/thdl/tib/scanner/Word.java
index b8821d1..c1384d1 100644
--- a/source/org/thdl/tib/scanner/Word.java
+++ b/source/org/thdl/tib/scanner/Word.java
@@ -76,6 +76,16 @@ public class Word extends Token
public String getDef()
{
return def.toString();
+ }
+
+ public String getDefPreview()
+ {
+ return def.getPreview();
+ }
+
+ public String getWordDefPreview()
+ {
+ return super.token + " - " + getDefPreview();
}
public Definitions getDefs()