From a86bad152f926139b5818df902cd60df345d4222 Mon Sep 17 00:00:00 2001 From: amontano Date: Mon, 28 Feb 2005 03:22:10 +0000 Subject: [PATCH] This classes were for private use for processing some dictionaries. They are out of place here. --- .../thdl/tib/scanner/DictionaryBreakDown.java | 440 ------------------ .../thdl/tib/scanner/SortingTibetanEntry.java | 26 -- 2 files changed, 466 deletions(-) delete mode 100644 source/org/thdl/tib/scanner/DictionaryBreakDown.java delete mode 100644 source/org/thdl/tib/scanner/SortingTibetanEntry.java diff --git a/source/org/thdl/tib/scanner/DictionaryBreakDown.java b/source/org/thdl/tib/scanner/DictionaryBreakDown.java deleted file mode 100644 index e9525cd..0000000 --- a/source/org/thdl/tib/scanner/DictionaryBreakDown.java +++ /dev/null @@ -1,440 +0,0 @@ -/* -The contents of this file are subject to the AMP Open Community License -Version 1.0 (the "License"); you may not use this file except in compliance -with the License. You may obtain a copy of the License on the AMP web site -(http://www.tibet.iteso.mx/Guatemala/). - -Software distributed under the License is distributed on an "AS IS" basis, -WITHOUT WARRANTY OF ANY KIND, either express or implied. See the -License for the specific terms governing rights and limitations under the -License. - -The Initial Developer of this software is Andres Montano Pellegrini. Portions -created by Andres Montano Pellegrini are Copyright 2001 Andres Montano -Pellegrini. All Rights Reserved. - -Contributor(s): ______________________________________. -*/ - -package org.thdl.tib.scanner; - -import org.thdl.util.*; -import org.thdl.tib.text.*; -import java.net.*; -import java.io.*; - -public class DictionaryBreakDown -{ - private static final int TOMERAIDER=1; - private static final int HTML=2; - private static final int SIMPLEBREAK=3; - private int mode; - private BufferedReader in; - private int numberOfFields; - private int numberOfMergedFields; - - private static final int UNIQUE=0; - private static final int MERGE_HEAD=1; - private static final int MERGE_FOLLOWER=2; - - public DictionaryBreakDown(BufferedReader in, int mode) - { - this.mode = mode; - this.in = in; - } - - public static void main (String[] args) throws Exception - { - PrintWriter out; - BufferedReader in=null; - int argNum = args.length, currentArg=0, mode = SIMPLEBREAK; - String option, format=null; - - if (argNum<=currentArg) - { - System.out.println("Syntax: DictionaryBreakDown [-format format] [-tomeraider | -html] input-file"); - return; - } - - while (args[currentArg].charAt(0)=='-') - { - option = args[currentArg].substring(1); - currentArg++; - if (option.equals("format")) - { - format=args[currentArg]; - currentArg++; - } else if (option.equals("tomeraider")) - { - mode=TOMERAIDER; - } else if (option.equals("html")) - { - mode=HTML; - } - } - if (argNum<=currentArg) - { - System.out.println("Syntax error. Input file expected."); - return; - } - - in = getBufferedReader(args[currentArg], format); - - new DictionaryBreakDown(in, mode).run(format); - } - - public static BufferedReader getBufferedReader(String s, String format) throws Exception - { - InputStream is; - - if (s.indexOf("http://") >= 0) - is = new BufferedInputStream((new URL(s)).openStream()); - else - is = new FileInputStream(s); - - if (format==null) - return new BufferedReader(new InputStreamReader(is)); - else - return new BufferedReader(new InputStreamReader(is, format)); - - } - - private int[] buildMergeMatrix(String fields[]) - { - int i; - int matrix[] = new int[fields.length]; - boolean sameRoot=false; - String root; - - for (i=0; i1 && s.charAt(0)=='\"' && s.charAt(s.length()-1)=='\"') - return s.substring(1,s.length()-1); - else return s; - } - - private String[] getFields(String linea) - { - int i=0, j, pos; - String fields[] = new String[this.numberOfFields], tokens[]; - - tokens = linea.split("\t"); - for (i=0; i=fields.length) break outAHere; - } - j++; - } - } - return mergedFields; - } - - public void run(String format) throws Exception - { - String linea, fieldNames[], fields[], mergedFieldNames[], mergedFields[], tail; - int i, pos, j; - int mergeMatrix[]; - PrintWriter out[] = null, outOne = null; - SimplifiedLinkedList ll = null, llOdd = null; - SimplifiedListIterator sli; - char ch; - - linea=in.readLine(); - if (linea==null) throw new Exception("File is empty!"); - fieldNames = linea.split("\t"); - numberOfFields = fieldNames.length; - mergeMatrix = buildMergeMatrix(fieldNames); - mergedFieldNames = buildMergedFieldNames(fieldNames, mergeMatrix); - numberOfMergedFields = mergedFieldNames.length; - - switch (mode) - { - case TOMERAIDER: - if (format!=null) outOne = new PrintWriter(new OutputStreamWriter(new FileOutputStream("dict-in-tomeraider-format.txt"), format)); - else outOne = new PrintWriter(new FileOutputStream("dict-in-tomeraider-format.txt")); - ll = new SimplifiedLinkedList(); - llOdd = new SimplifiedLinkedList(); - break; - - case HTML: - if (format != null) outOne = new PrintWriter(new OutputStreamWriter(new FileOutputStream("dict-in-tab-format.txt"), format)); - else outOne = new PrintWriter(new FileOutputStream("dict-in-tab-format.txt")); - break; - - case SIMPLEBREAK: - out = new PrintWriter[mergedFieldNames.length-1]; - for (i=0; i0) linea = linea.substring(0,pos).trim(); - pos = linea.indexOf('+'); - if (pos>0) - { - llOdd.addLast(mergedFields); - continue; - } - try - { - ll.addSorted(new SortingTibetanEntry(linea, mergedFields)); - } - catch (Exception e) - { - llOdd.addLast(mergedFields); - } - - break; - case HTML: - tail = null; - - pos = mergedFields[0].indexOf("..."); - - if (pos==0) mergedFields[0] = mergedFields[0].substring(3).trim(); - else - if (pos>0) - { - tail = mergedFields[0].substring(pos + 3).trim(); - mergedFields[0] = mergedFields[0].substring(0,pos-1).trim(); - } - outOne.print(mergedFields[0] + "\t"); - - for (i=1; i<=2; i++) // tenses & sanskrit - { - if (!mergedFields[i].equals("")) - { - if (tail!=null) outOne.print("... " + tail + ": " + mergedFields[i] + "
"); - else outOne.print(mergedFields[i] + "
"); - } - } - - if (!mergedFields[3].equals("")) // english - { - if (tail!=null) outOne.print("... " + tail + ": " + mergedFields[3] + "
"); - else outOne.print("" + mergedFields[3] + "
"); - } - if (!mergedFields[4].equals("")) // english-others - { - if (tail!=null) outOne.print("... " + tail + ": " + mergedFields[4] + "
"); - else outOne.print("" + mergedFields[4] + "
"); - } - - for (i=5; i<=8; i++) - { - if (!mergedFields[i].equals("")) - { - if (tail!=null) outOne.print("... " + tail + ": " + mergedFields[i] + "
"); - else outOne.print(mergedFields[i] + "
"); - } - } - - if (!mergedFields[9].equals("")) // synonyms-tibetan - { - if (tail!=null) outOne.print("... " + tail + ": Synonyms: " + mergedFields[9] + "
"); - else outOne.print("Synonyms: " + mergedFields[9] + "
"); - } - - for (i=10; i<=13; i++) - { - if (!mergedFields[i].equals("")) - { - if (tail!=null) outOne.print("... " + tail + ": " + mergedFields[i] + "
"); - else outOne.print(mergedFields[i] + "
"); - } - } - outOne.println(); - break; - case SIMPLEBREAK: - tail = null; - - pos = mergedFields[0].indexOf("..."); - - if (pos==0) mergedFields[0] = mergedFields[0].substring(3).trim(); - else - if (pos>0) - { - tail = mergedFields[0].substring(pos + 3).trim(); - mergedFields[0] = mergedFields[0].substring(0,pos-1).trim(); - } - - for (i=1; i\nAbout\n"); - outOne.println(TibetanScanner.aboutTomeraider); - - sli = ll.listIterator(); - while (sli.hasNext()) - { - mergedFields = ((SortingTibetanEntry)sli.next()).get(); - outOne.println("\n\n" + mergedFields[0] + "\n"); - if (!mergedFields[1].equals("")) outOne.println(mergedFields[1] + "
"); // tenses - if (!mergedFields[2].equals("")) outOne.println(mergedFields[2] + "
"); // sanskrit - if (!mergedFields[3].equals("")) outOne.println("" + mergedFields[3] + "
"); // english - if (!mergedFields[4].equals("")) outOne.println("" + mergedFields[4] + "
"); // english-others - for (i=5; i<=8; i++) - { - if (!mergedFields[i].equals("")) outOne.println(mergedFields[i] + "
"); - } - if (!mergedFields[9].equals("")) outOne.println("Synonyms: " + mergedFields[9] + "
"); // synonyms-tibetan - - for (i=10; i<=13; i++) - { - if (!mergedFields[i].equals("")) outOne.println(mergedFields[i] + "
"); - } - } - - sli = llOdd.listIterator(); - if (sli.hasNext()) - { - outOne.println("\n\nUnsorted entries start here.\n"); - outOne.println("Because of errors in the entries with regards to:"); - outOne.println("
    "); - outOne.println("
  1. Invalid tibetan"); - outOne.println("
  2. Conversion from Tibetan script to extended wylie"); - outOne.println("
  3. or the sorting algorithm"); - outOne.println("
"); - outOne.println("the following entries could not be sorted. Hopefully these errors will be corrected in future versions."); - } - while (sli.hasNext()) - { - mergedFields = (String[])sli.next(); - outOne.println("\n\n" + mergedFields[0] + "\n"); - if (!mergedFields[1].equals("")) outOne.println(mergedFields[1] + "
"); // tenses - if (!mergedFields[2].equals("")) outOne.println(mergedFields[2] + "
"); // sanskrit - if (!mergedFields[3].equals("")) outOne.println("" + mergedFields[3] + "
"); // english - if (!mergedFields[4].equals("")) outOne.println("" + mergedFields[4] + "
"); // english-others - for (i=5; i<=8; i++) - { - if (!mergedFields[i].equals("")) outOne.println(mergedFields[i] + "
"); - } - if (!mergedFields[9].equals("")) outOne.println("Synonyms: " + mergedFields[9] + "
"); // synonyms-tibetan - - for (i=10; i<=13; i++) - { - if (!mergedFields[i].equals("")) outOne.println(mergedFields[i] + "
"); - } - } - case HTML: - outOne.flush(); // no break above so that both flush. - break; - - case SIMPLEBREAK: - for (i=0; i