diff --git a/source/org/thdl/tib/scanner/AcipToTab.java b/source/org/thdl/tib/scanner/AcipToTab.java index 9f9130f..1255abb 100644 --- a/source/org/thdl/tib/scanner/AcipToTab.java +++ b/source/org/thdl/tib/scanner/AcipToTab.java @@ -80,10 +80,6 @@ class AcipToTab if (temp.length()>0) { currentPage=Integer.parseInt(temp); - if (currentPage==3141) - { - System.out.println("Hello!"); - } } if (marker0) marker = marker2; + s1 = Manipulate.deleteQuotes(entrada.substring(0,marker).trim()); - s2 = Manipulate.deleteQuotes(entrada.substring(marker+delimiter.length()).trim()); - if (!s2.equals("")) + s2 = Manipulate.deleteQuotes(entrada.substring(marker+delimiter.length())).trim(); + + if (Manipulate.isMeaningful(s2)) { if (currentLine%5000==0) { @@ -527,8 +544,16 @@ public class BinaryFileGenerator extends SimplifiedLinkedList Link link, newLink; BinaryFileGenerator ultimo; String firstSillable; - int marker = word.indexOf(" "), comp; + int marker, comp; + while (true) + { + marker = Manipulate.indexOfExtendedEndOfSyllableMark(word); + if (marker==0) word = word.substring(1); + else if (marker==word.length()-1) word = word.substring(0,word.length()-1); + else break; + } + if (marker<0) firstSillable = word; else firstSillable = word.substring(0,marker); diff --git a/source/org/thdl/tib/scanner/LocalTibetanScanner.java b/source/org/thdl/tib/scanner/LocalTibetanScanner.java index 4d57b4e..d422fd7 100644 --- a/source/org/thdl/tib/scanner/LocalTibetanScanner.java +++ b/source/org/thdl/tib/scanner/LocalTibetanScanner.java @@ -35,8 +35,6 @@ public class LocalTibetanScanner extends TibetanScanner private SyllableListTree raiz, silActual, lastCompSil, silAnterior; private String wordActual, lastCompWord; private Vector floatingSil; - private static String endOfParagraphMarks = "/;|!:[]^@#$%=<>(){}"; - private static String endOfSyllableMarks = " _\t"; static { @@ -295,11 +293,6 @@ public class LocalTibetanScanner extends TibetanScanner } } - private boolean isEndOfSyllable(int ch) - { - return (endOfSyllableMarks.indexOf(ch)>-1); - } - public void scanLine(String linea) { int init = 0, fin; @@ -325,7 +318,7 @@ outAHere: if (init>=linea.length()) break outAHere; ch = linea.charAt(init); - if (endOfParagraphMarks.indexOf(ch)>=0) + if (Manipulate.isPunctuationMark(ch)) { if (doNotFinishUp) { @@ -334,7 +327,7 @@ outAHere: } wordList.addLast(new PunctuationMark(ch)); } - else if (endOfSyllableMarks.indexOf(ch)<0) + else if (!Manipulate.isEndOfSyllableMark(ch)) break; init++; @@ -350,12 +343,12 @@ outAHere: while (fin < linea.length()) { ch = linea.charAt(fin); - if (endOfParagraphMarks.indexOf(ch)>=0) + if (Manipulate.isPunctuationMark(ch)) { doNotFinishUp = false; break; } - else if (endOfSyllableMarks.indexOf(ch)>=0) + else if (Manipulate.isEndOfSyllableMark(ch)) { break; } diff --git a/source/org/thdl/tib/scanner/Manipulate.java b/source/org/thdl/tib/scanner/Manipulate.java index b0845bd..5e2c7fd 100644 --- a/source/org/thdl/tib/scanner/Manipulate.java +++ b/source/org/thdl/tib/scanner/Manipulate.java @@ -28,6 +28,11 @@ import org.thdl.util.*; public class Manipulate { + private static String endOfParagraphMarks = "/;|!:^@#$%="; + private static String bracketMarks = "<>(){}[]"; + private static String endOfSyllableMarks = " _\t"; + private static String allStopMarkers = endOfSyllableMarks + endOfParagraphMarks + bracketMarks; + /* public static String[] parseFields (String s, char delimiter) { int pos; @@ -43,7 +48,52 @@ public class Manipulate ll.addLast(s.trim()); return ll.toStringArray(); - }*/ + }*/ + + public static int indexOfAnyChar(String str, String chars) + { + int i; + for (i=0; i=0) + return i; + } + + return -1; + } + + public static int indexOfExtendedEndOfSyllableMark(String word) + { + return indexOfAnyChar(word, allStopMarkers); + } + + public static int indexOfBracketMarks(String word) + { + return indexOfAnyChar(word, bracketMarks); + } + + public static boolean isPunctuationMark(int ch) + { + return endOfParagraphMarks.indexOf(ch)>=0 || bracketMarks.indexOf(ch)>=0; + } + + public static boolean isEndOfParagraphMark(int ch) + { + return endOfParagraphMarks.indexOf(ch)>=0; + } + + public static boolean isEndOfSyllableMark(int ch) + { + return endOfSyllableMarks.indexOf(ch)>=0; + } + + public static boolean isMeaningful(String s) + { + for (int i=0; i1 && !Character.isLetter(nuevaLinea.charAt(i-2)) || chN == 'a' && (i+2==len || !Character.isLetter(nuevaLinea.charAt(i+2))))) + { + nuevaLinea = nuevaLinea.substring(0,i-1) + Character.toUpperCase(chN) + nuevaLinea.substring(i+2); + len-=2; + } } } - break; - case 'a': - if ((i2) { - if ((s.charAt(0)=='\"') && (s.charAt(length-1)=='\"')) - return s.substring(1,length-1); + if ((s.charAt(0)=='\"') && (s.charAt(length-1)=='\"')) + return s.substring(1,length-1); + + do + { + pos = s.indexOf("\"\""); + if (pos<0) break; + s = Manipulate.deleteSubstring(s, pos, pos+1); + } while (true); } + return s; } + + /** Syntax: java Manipulate [word-file] < source-dic-entries > dest-dic-entries