/* The contents of this file are subject to the AMP Open Community License Version 1.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License on the AMP web site (http://www.tibet.iteso.mx/Guatemala/). Software distributed under the License is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the specific terms governing rights and limitations under the License. The Initial Developer of this software is Andres Montano Pellegrini. Portions created by Andres Montano Pellegrini are Copyright 2001 Andres Montano Pellegrini. All Rights Reserved. Contributor(s): ______________________________________. */ package org.thdl.tib.scanner; import org.thdl.util.*; import org.thdl.tib.text.*; import java.net.*; import java.io.*; public class DictionaryBreakDown { private static final int TOMERAIDER=1; private static final int HTML=2; private static final int SIMPLEBREAK=3; private int mode; private BufferedReader in; private int numberOfFields; private int numberOfMergedFields; private static final int UNIQUE=0; private static final int MERGE_HEAD=1; private static final int MERGE_FOLLOWER=2; public DictionaryBreakDown(BufferedReader in, int mode) { this.mode = mode; this.in = in; } public static void main (String[] args) throws Exception { PrintWriter out; BufferedReader in=null; int argNum = args.length, currentArg=0, mode = SIMPLEBREAK; String option, format=null; if (argNum<=currentArg) { System.out.println("Syntax: DictionaryBreakDown [-format format] [-tomeraider | -html] input-file"); return; } while (args[currentArg].charAt(0)=='-') { option = args[currentArg].substring(1); currentArg++; if (option.equals("format")) { format=args[currentArg]; currentArg++; } else if (option.equals("tomeraider")) { mode=TOMERAIDER; } else if (option.equals("html")) { mode=HTML; } } if (argNum<=currentArg) { System.out.println("Syntax error. Input file expected."); return; } in = getBufferedReader(args[currentArg], format); new DictionaryBreakDown(in, mode).run(format); } public static BufferedReader getBufferedReader(String s, String format) throws Exception { InputStream is; if (s.indexOf("http://") >= 0) is = new BufferedInputStream((new URL(s)).openStream()); else is = new FileInputStream(s); if (format==null) return new BufferedReader(new InputStreamReader(is)); else return new BufferedReader(new InputStreamReader(is, format)); } private int[] buildMergeMatrix(String fields[]) { int i; int matrix[] = new int[fields.length]; boolean sameRoot=false; String root; for (i=0; i1 && s.charAt(0)=='\"' && s.charAt(s.length()-1)=='\"') return s.substring(1,s.length()-1); else return s; } private String[] getFields(String linea) { int i=0, j, pos; String fields[] = new String[this.numberOfFields], tokens[]; tokens = linea.split("\t"); for (i=0; i=fields.length) break outAHere; } j++; } } return mergedFields; } public void run(String format) throws Exception { String linea, fieldNames[], fields[], mergedFieldNames[], mergedFields[], tail; int i, pos, j; int mergeMatrix[]; PrintWriter out[] = null, outOne = null; SimplifiedLinkedList ll = null, llOdd = null; SimplifiedListIterator sli; char ch; linea=in.readLine(); if (linea==null) throw new Exception("File is empty!"); fieldNames = linea.split("\t"); numberOfFields = fieldNames.length; mergeMatrix = buildMergeMatrix(fieldNames); mergedFieldNames = buildMergedFieldNames(fieldNames, mergeMatrix); numberOfMergedFields = mergedFieldNames.length; switch (mode) { case TOMERAIDER: if (format!=null) outOne = new PrintWriter(new OutputStreamWriter(new FileOutputStream("dict-in-tomeraider-format.txt"), format)); else outOne = new PrintWriter(new FileOutputStream("dict-in-tomeraider-format.txt")); ll = new SimplifiedLinkedList(); llOdd = new SimplifiedLinkedList(); break; case HTML: if (format != null) outOne = new PrintWriter(new OutputStreamWriter(new FileOutputStream("dict-in-tab-format.txt"), format)); else outOne = new PrintWriter(new FileOutputStream("dict-in-tab-format.txt")); break; case SIMPLEBREAK: out = new PrintWriter[mergedFieldNames.length-1]; for (i=0; i0) linea = linea.substring(0,pos).trim(); pos = linea.indexOf('+'); if (pos>0) { llOdd.addLast(mergedFields); continue; } try { ll.addSorted(new SortingTibetanEntry(linea, mergedFields)); } catch (Exception e) { llOdd.addLast(mergedFields); } break; case HTML: tail = null; pos = mergedFields[0].indexOf("..."); if (pos==0) mergedFields[0] = mergedFields[0].substring(3).trim(); else if (pos>0) { tail = mergedFields[0].substring(pos + 3).trim(); mergedFields[0] = mergedFields[0].substring(0,pos-1).trim(); } outOne.print(mergedFields[0] + "\t"); for (i=1; i<=2; i++) // tenses & sanskrit { if (!mergedFields[i].equals("")) { if (tail!=null) outOne.print("... " + tail + ": " + mergedFields[i] + "
"); else outOne.print(mergedFields[i] + "
"); } } if (!mergedFields[3].equals("")) // english { if (tail!=null) outOne.print("... " + tail + ": " + mergedFields[3] + "
"); else outOne.print("" + mergedFields[3] + "
"); } if (!mergedFields[4].equals("")) // english-others { if (tail!=null) outOne.print("... " + tail + ": " + mergedFields[4] + "
"); else outOne.print("" + mergedFields[4] + "
"); } for (i=5; i<=8; i++) { if (!mergedFields[i].equals("")) { if (tail!=null) outOne.print("... " + tail + ": " + mergedFields[i] + "
"); else outOne.print(mergedFields[i] + "
"); } } if (!mergedFields[9].equals("")) // synonyms-tibetan { if (tail!=null) outOne.print("... " + tail + ": Synonyms: " + mergedFields[9] + "
"); else outOne.print("Synonyms: " + mergedFields[9] + "
"); } for (i=10; i<=13; i++) { if (!mergedFields[i].equals("")) { if (tail!=null) outOne.print("... " + tail + ": " + mergedFields[i] + "
"); else outOne.print(mergedFields[i] + "
"); } } outOne.println(); break; case SIMPLEBREAK: tail = null; pos = mergedFields[0].indexOf("..."); if (pos==0) mergedFields[0] = mergedFields[0].substring(3).trim(); else if (pos>0) { tail = mergedFields[0].substring(pos + 3).trim(); mergedFields[0] = mergedFields[0].substring(0,pos-1).trim(); } for (i=1; i\nAbout\n"); outOne.println(TibetanScanner.aboutTomeraider); sli = ll.listIterator(); while (sli.hasNext()) { mergedFields = ((SortingTibetanEntry)sli.next()).get(); outOne.println("\n\n" + mergedFields[0] + "\n"); if (!mergedFields[1].equals("")) outOne.println(mergedFields[1] + "
"); // tenses if (!mergedFields[2].equals("")) outOne.println(mergedFields[2] + "
"); // sanskrit if (!mergedFields[3].equals("")) outOne.println("" + mergedFields[3] + "
"); // english if (!mergedFields[4].equals("")) outOne.println("" + mergedFields[4] + "
"); // english-others for (i=5; i<=8; i++) { if (!mergedFields[i].equals("")) outOne.println(mergedFields[i] + "
"); } if (!mergedFields[9].equals("")) outOne.println("Synonyms: " + mergedFields[9] + "
"); // synonyms-tibetan for (i=10; i<=13; i++) { if (!mergedFields[i].equals("")) outOne.println(mergedFields[i] + "
"); } } sli = llOdd.listIterator(); if (sli.hasNext()) { outOne.println("\n\nUnsorted entries start here.\n"); outOne.println("Because of errors in the entries with regards to:"); outOne.println("
    "); outOne.println("
  1. Invalid tibetan"); outOne.println("
  2. Conversion from Tibetan script to extended wylie"); outOne.println("
  3. or the sorting algorithm"); outOne.println("
"); outOne.println("the following entries could not be sorted. Hopefully these errors will be corrected in future versions."); } while (sli.hasNext()) { mergedFields = (String[])sli.next(); outOne.println("\n\n" + mergedFields[0] + "\n"); if (!mergedFields[1].equals("")) outOne.println(mergedFields[1] + "
"); // tenses if (!mergedFields[2].equals("")) outOne.println(mergedFields[2] + "
"); // sanskrit if (!mergedFields[3].equals("")) outOne.println("" + mergedFields[3] + "
"); // english if (!mergedFields[4].equals("")) outOne.println("" + mergedFields[4] + "
"); // english-others for (i=5; i<=8; i++) { if (!mergedFields[i].equals("")) outOne.println(mergedFields[i] + "
"); } if (!mergedFields[9].equals("")) outOne.println("Synonyms: " + mergedFields[9] + "
"); // synonyms-tibetan for (i=10; i<=13; i++) { if (!mergedFields[i].equals("")) outOne.println(mergedFields[i] + "
"); } } case HTML: outOne.flush(); // no break above so that both flush. break; case SIMPLEBREAK: for (i=0; i